2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Sebastian Hack, Matthias Braun, Daniel Grund
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
43 #include "irnodeset.h"
45 #include "irprintf_t.h"
51 #include "iredges_t.h"
52 #include "irphase_t.h"
61 #include "besched_t.h"
65 #include "bechordal_t.h"
66 #include "bespilloptions.h"
67 #include "beloopana.h"
72 #include <libcore/lc_opts.h>
73 #include <libcore/lc_opts_enum.h>
74 #include <libcore/lc_timing.h>
83 #define DBG_WORKSET 128
84 #define DBG_GLOBAL 256
86 #define ALREADY_SPILLED_FACTOR 2
89 #define LIVE_END (DEAD-1)
90 #define REMAT_DIST (DEAD-2)
92 static int already_spilled_factor = 2;
93 static int remat_live_range_ext = 1;
94 static int global_pass_enabled = 1;
96 static const lc_opt_table_entry_t options[] = {
97 LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor),
98 LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
99 LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled),
103 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
106 * An association between a node and a point in time.
108 typedef struct _loc_t {
109 ir_node *irn; /**< A node. */
110 unsigned time; /**< A use time.
111 In the global pass this is used
112 as the version number and not as a time.
113 Only to save space...
117 typedef struct _workset_t {
118 int len; /**< current length */
119 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
122 typedef struct _belady_env_t {
126 const arch_env_t *arch;
127 const arch_register_class_t *cls;
131 ir_node **blocks; /**< Array of all blocks. */
132 int n_blocks; /**< Number of blocks in the graph. */
133 int n_regs; /**< number of regs in this reg-class */
134 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
135 ir_node *instr; /**< current instruction */
136 int instr_nr; /**< current instruction number (relative to block start) */
138 spill_env_t *senv; /**< see bespill.h */
139 bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */
143 static int loc_compare(const void *a, const void *b)
147 return (p->time > q->time) - (p->time < q->time);
150 static INLINE void workset_print(const workset_t *w)
154 for(i = 0; i < w->len; ++i) {
155 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
160 * Alloc a new workset on obstack @p ob with maximum size @p max
162 static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
164 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
165 res = obstack_alloc(ob, size);
166 memset(res, 0, size);
171 * Alloc a new instance on obstack and make it equal to @param ws
173 static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
175 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
176 res = obstack_alloc(ob, size);
177 memcpy(res, ws, size);
182 * Do NOT alloc anything. Make @param tgt equal to @param src.
183 * returns @param tgt for convenience
185 static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
186 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
187 memcpy(tgt, src, size);
192 * Overwrites the current content array of @param ws with the
193 * @param count locations given at memory @param locs.
194 * Set the length of @param ws to count.
196 static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
197 workset->len = count;
198 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
202 * Inserts the value @p val into the workset, iff it is not
203 * already contained. The workset must not be full.
205 static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
207 /* check for current regclass */
208 if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) {
209 // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
213 /* check if val is already contained */
214 for(i=0; i<ws->len; ++i)
215 if (ws->vals[i].irn == val)
219 assert(ws->len < env->n_regs && "Workset already full!");
220 ws->vals[ws->len++].irn = val;
224 * Removes all entries from this workset
226 static INLINE void workset_clear(workset_t *ws) {
231 * Removes the value @p val from the workset if present.
233 static INLINE void workset_remove(workset_t *ws, ir_node *val) {
235 for(i=0; i<ws->len; ++i) {
236 if (ws->vals[i].irn == val) {
237 ws->vals[i] = ws->vals[--ws->len];
243 static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) {
245 for(i=0; i<ws->len; ++i) {
246 if (ws->vals[i].irn == val)
254 * Iterates over all values in the working set.
255 * @p ws The workset to iterate
256 * @p v A variable to put the current value in
257 * @p i An integer for internal use
259 #define workset_foreach(ws, v, i) for(i=0; \
260 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
263 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
264 #define workset_get_time(ws, i) (ws)->vals[i].time
265 #define workset_set_length(ws, length) (ws)->len = length
266 #define workset_get_length(ws) ((ws)->len)
267 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
268 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
269 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
271 typedef struct _bring_in_t bring_in_t;
273 typedef struct _block_info_t {
278 workset_t *ws_end; /**< The end set after the local belady pass. */
279 double exec_freq; /**< The execution frequency of this block. */
281 double reload_cost; /**< Cost of a reload in this block. */
282 ir_node *first_non_in; /**< First node in block which is not a phi. */
283 ir_node *last_ins; /**< The instruction before which end of
284 block reloads will be inserted. */
286 int pressure; /**< The amount of registers which remain free
287 in this block. This capacity can be used to let
288 global variables, transported into other blocks,
289 live through this block. */
291 int front_pressure; /**< The pressure right before the first
292 real (non-phi) node. At the beginning
293 of the global pass, this is 0. */
294 struct list_head br_head; /**< List head for all bring_in variables. */
295 int free_at_jump; /**< registers free at jump. */
299 static INLINE void *new_block_info(belady_env_t *bel, int id)
301 ir_node *bl = bel->blocks[id];
302 block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
303 memset(res, 0, sizeof(res[0]));
304 res->first_non_in = NULL;
305 res->last_ins = NULL;
309 res->exec_freq = get_block_execfreq(bel->ef, bl);
310 res->reload_cost = bel->arch->isa->reload_cost * res->exec_freq;
311 res->free_at_jump = bel->n_regs;
312 INIT_LIST_HEAD(&res->br_head);
313 set_irn_link(bl, res);
317 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
318 #define set_block_info(block, info) set_irn_link(block, info)
320 static INLINE ir_node *block_info_get_last_ins(block_info_t *bi)
323 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
328 typedef struct _next_use_t {
329 unsigned is_first_use : 1; /**< Indicate that this use is the first
330 in the block. Needed to identify
331 transport in values for the global
333 sched_timestep_t step; /**< The time step of the use. */
335 struct _next_use_t *next; /**< The next use int this block
339 static void *next_use_init(ir_phase *phase, const ir_node *irn, void *old)
347 static void build_next_uses(block_info_t *bi)
351 sched_renumber(bi->bl);
353 phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
354 sched_foreach_reverse(bi->bl, irn) {
360 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
361 ir_node *op = get_irn_n(irn, i);
362 next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
363 next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
365 use->is_first_use = 1;
366 use->step = sched_get_time_step(irn);
371 curr->is_first_use = 0;
372 assert(curr->step >= use->step);
375 phase_set_irn_data(&bi->next_uses, op, use);
380 #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn))
382 static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn)
384 next_use_t *use = get_current_use(bi, irn);
387 phase_set_irn_data(&bi->next_uses, irn, use->next);
390 static __attribute__((unused)) int block_freq_gt(const void *a, const void *b)
392 const ir_node * const *p = a;
393 const ir_node * const *q = b;
394 block_info_t *pi = get_block_info(*p);
395 block_info_t *qi = get_block_info(*q);
396 double diff = qi->exec_freq - pi->exec_freq;
397 return (diff > 0) - (diff < 0);
400 static int block_freq_dfs_gt(const void *a, const void *b)
402 const ir_node * const *p = a;
403 const ir_node * const *q = b;
404 block_info_t *pi = get_block_info(*p);
405 block_info_t *qi = get_block_info(*q);
408 if ((pi->exec_freq > 1.0 && qi->exec_freq > 1.0)
409 || (pi->exec_freq <= 1.0 && qi->exec_freq <= 1.0)) {
411 const dfs_t *dfs = pi->bel->dfs;
412 int pp = dfs_get_post_num(dfs, pi->bl);
413 int pq = dfs_get_post_num(dfs, qi->bl);
417 diff = qi->exec_freq - pi->exec_freq;
418 return (diff > 0) - (diff < 0);
423 | __ ) _ __(_)_ __ __ _ |_ _|_ __
424 | _ \| '__| | '_ \ / _` | | || '_ \
425 | |_) | | | | | | | (_| | | || | | |
426 |____/|_| |_|_| |_|\__, | |___|_| |_|
429 Data structures to represent bring in variables.
433 ir_node *irn; /**< The node to bring in. */
434 block_info_t *bi; /**< The block to which bring in should happen. */
435 int pressure_so_far; /**< The maximal pressure till the first use of irn in bl. */
436 ir_node *first_use; /**< The first user of irn in bl. */
437 sched_timestep_t use_step; /**< Schedule sttep of the first use. */
439 int is_remat : 1; /**< Is rematerializable. */
440 int sect_pressure; /**< Offset to maximum pressure in block. */
441 struct list_head list;
442 struct list_head sect_list;
443 bring_in_t *sect_head;
446 static INLINE bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use)
448 bring_in_t *br = obstack_alloc(&bi->bel->ob, sizeof(br[0]));
452 br->first_use = use->irn;
453 br->use_step = use->step;
454 br->is_remat = be_is_rematerializable(bi->bel->senv, irn, use->irn);
455 br->pressure_so_far = bi->pressure;
456 br->sect_pressure = bi->front_pressure;
459 INIT_LIST_HEAD(&br->list);
460 INIT_LIST_HEAD(&br->sect_list);
461 list_add_tail(&br->list, &bi->br_head);
465 static int bring_in_cmp(const void *a, const void *b)
467 const bring_in_t *p = *(const bring_in_t * const *) a;
468 const bring_in_t *q = *(const bring_in_t * const *) b;
471 /* if one of both is a remat node, it will be done after the other. */
472 if (p->is_remat != q->is_remat)
473 return p->is_remat - q->is_remat;
475 /* in the same block, the one further in the front has to be processed first!
476 * Otherwise the front_pressure 'trick' is not exact. */
478 return p->use_step - q->use_step;
480 fp = p->bi->exec_freq;
481 fq = q->bi->exec_freq;
483 /* if both have the same frequency, inspect the frequency of the definition */
485 double fdp = get_block_info(get_nodes_block(p->irn))->exec_freq;
486 double fdq = get_block_info(get_nodes_block(q->irn))->exec_freq;
488 /* if the defs of both have the same freq, we go for reverse dfs post order. */
490 const dfs_t *dfs = p->bi->bel->dfs;
491 int pp = dfs_get_post_num(dfs, p->bi->bl);
492 int pq = dfs_get_post_num(dfs, q->bi->bl);
496 return (fdq > fdp) - (fdq < fdp);
499 return (fq > fp) - (fq < fp);
502 static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
504 belady_env_t *env = bi->bel;
505 sched_timestep_t curr_step = sched_get_time_step(env->instr);
506 next_use_t *use = get_current_use(bi, irn);
507 int flags = arch_irn_get_flags(env->arch, irn);
509 assert(!(flags & arch_irn_flags_ignore));
511 /* We have to keep nonspillable nodes in the workingset */
512 if(flags & arch_irn_flags_dont_spill)
515 if (!is_usage && use && use->step == curr_step)
519 unsigned res = use->step - curr_step;
521 assert(use->step >= curr_step);
524 if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
526 else if (bitset_contains_irn(env->spilled, irn))
527 res *= already_spilled_factor;
533 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
536 static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn)
538 return is_Phi(irn) && get_nodes_block(irn) == bl;
542 * Check, if the value is something that is transported into a block.
543 * That is, the value is defined elsewhere or defined by a Phi in the block.
544 * @param env The belady environment.
545 * @param bl The block in question.
546 * @param irn The node in question.
547 * @return 1, if node is something transported into @p bl, 0 if not.
548 * @note The function will only give correct answers in the case
549 * where @p irn is unsed in the block @p bl which is always
550 * the case in our usage scenario.
552 static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn)
554 return get_nodes_block(irn) != bl || is_Phi(irn);
558 * Performs the actions necessary to grant the request that:
559 * - new_vals can be held in registers
560 * - as few as possible other values are disposed
561 * - the worst values get disposed
563 * @p is_usage indicates that the values in new_vals are used (not defined)
564 * In this case reloads must be performed
566 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
567 belady_env_t *env = bi->bel;
568 workset_t *ws = env->ws;
569 ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0]));
571 int i, len, max_allowed, demand, iter;
575 1. Identify the number of needed slots and the values to reload
578 workset_foreach(new_vals, val, iter) {
579 /* mark value as used */
581 if (! workset_contains(ws, val)) {
582 DBG((dbg, DBG_DECIDE, "\t\tinsert %+F\n", val));
583 to_insert[demand++] = val;
585 next_use_t *use = get_current_use(bi, val);
588 * if we use a value which is transported in this block, i.e. a
589 * phi defined here or a live in, for the first time, we check
590 * if there is room for that guy to survive from the block's
591 * entrance to here or not.
594 assert(sched_get_time_step(env->instr) == (int) use->step);
595 if (is_transport_in(bi->bl, val) && use->is_first_use) {
596 bring_in_t *bri = new_bring_in(bi, val, use);
597 bri->first_use = env->instr;
599 /* reset the section pressure, since a new section starts. */
600 bi->front_pressure = 0;
602 DBG((dbg, DBG_DECIDE, "\t\tbring in node %+F, pressure %d:\n", val, bi->pressure));
603 DBG((dbg, DBG_DECIDE, "\t\tno reload. must be considered at block start\n"));
607 bitset_add_irn(env->spilled, val);
608 DBG((dbg, DBG_SPILL, "\t\tReload %+F before %+F\n", val, env->instr));
609 be_add_reload(env->senv, val, env->instr, env->cls, 1);
613 assert(is_usage && "Defined value already in workset?!?");
614 DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val));
617 DBG((dbg, DBG_DECIDE, "\t\tdemand = %d\n", demand));
620 2. Make room for at least 'demand' slots
622 len = workset_get_length(ws);
623 max_allowed = env->n_regs - demand;
625 /* Only make more free room if we do not have enough */
626 if (len > max_allowed) {
627 DBG((dbg, DBG_DECIDE, "\t\tdisposing %d values\n", len - max_allowed));
629 /* get current next-use distance */
630 for (i = 0; i < ws->len; ++i) {
631 ir_node *val = workset_get_val(ws, i);
632 unsigned dist = get_curr_distance(bi, val, is_usage);
633 workset_set_time(ws, i, dist);
636 /* sort entries by increasing nextuse-distance*/
639 /* kill the last 'demand' entries in the array */
640 workset_set_length(ws, max_allowed);
644 3. Insert the new values into the workset
645 Also, we update the pressure in the block info.
646 That is important for the global pass to decide
647 how many values can live through the block.
649 for (i = 0; i < demand; ++i)
650 workset_insert(env, env->ws, to_insert[i]);
652 /* TODO: simplify this expression? */
653 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
654 bi->front_pressure = MAX(bi->front_pressure, workset_get_length(env->ws));
658 * For the given block @p block, decide for each values
659 * whether it is used from a register or is reloaded
662 static void belady(belady_env_t *env, int id) {
663 block_info_t *block_info = new_block_info(env, id);
664 const ir_node *block = block_info->bl;
670 DBG((dbg, DBG_WSETS, "Belady on %+F\n", block_info->bl));
671 new_vals = new_workset(env, &env->ob);
672 workset_clear(env->ws);
674 /* build the next use information for this block. */
675 build_next_uses(block_info);
678 block_info->first_non_in = NULL;
680 /* process the block from start to end */
681 sched_foreach(block, irn) {
683 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
685 /* projs are handled with the tuple value.
686 * Phis are no real instr (see insert_starters())
687 * instr_nr does not increase */
688 if (is_Proj(irn) || is_Phi(irn))
690 DBG((dbg, DBG_DECIDE, "\t%+F\n", irn));
692 if (!block_info->first_non_in)
693 block_info->first_non_in = irn;
695 /* set instruction in the workset */
698 /* allocate all values _used_ by this instruction */
699 workset_clear(new_vals);
700 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
701 workset_insert(env, new_vals, get_irn_n(irn, i));
703 DBG((dbg, DBG_DECIDE, "\t* uses\n"));
704 displace(block_info, new_vals, 1);
707 * set all used variables to the next use in their next_use_t list
708 * Also, kill all dead variables from the workset. They are only
709 * augmenting the pressure. Note, that a variable is dead
710 * if it has no further use in this block and is *not* live end
712 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
713 ir_node *op = get_irn_n(irn, i);
714 next_use_t *use = get_current_use(block_info, op);
717 if (!use->next && !be_is_live_end(env->lv, block, op))
718 workset_remove(env->ws, op);
720 advance_current_use(block_info, op);
723 /* allocate all values _defined_ by this instruction */
724 workset_clear(new_vals);
725 if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
726 const ir_edge_t *edge;
728 foreach_out_edge(irn, edge) {
729 ir_node *proj = get_edge_src_irn(edge);
730 workset_insert(env, new_vals, proj);
733 workset_insert(env, new_vals, irn);
735 DBG((dbg, DBG_DECIDE, "\t* defs\n"));
736 displace(block_info, new_vals, 0);
738 if (is_op_forking(get_irn_op(env->instr))) {
739 for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) {
740 ir_node *op = get_irn_n(env->instr, i);
741 block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->arch, env->cls, op);
748 phase_free(&block_info->next_uses);
750 /* Remember end-workset for this block */
751 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
752 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
753 workset_foreach(block_info->ws_end, irn, iter)
754 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
755 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
757 /* now, initialize the front pressure to 0. */
758 block_info->front_pressure = 0;
763 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
764 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
765 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
766 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
771 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
772 #define workset_get_version(ws, i) ((ws)->vals[(i)].time)
774 #define ver_oldest (0)
775 #define ver_youngest ((unsigned) -1)
776 #define ver_make_newer(v) ((v) + 1)
777 #define ver_is_older(v, w) ((v) < (w))
778 #define ver_is_younger(v, w) ((v) > (w))
786 typedef struct _block_state_t {
787 struct _block_state_t *next;
788 struct _block_state_t *next_intern;
791 workset_t *end_state;
794 typedef struct _irn_action_t {
795 struct _irn_action_t *next;
801 typedef struct _global_end_state_t {
809 unsigned *bs_tops_vers;
810 block_state_t **bs_tops;
811 block_state_t *bs_top;
812 irn_action_t *ia_top;
813 } global_end_state_t;
817 block_state_t *bs_top;
818 irn_action_t *ia_top;
821 static INLINE block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi)
824 assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
825 return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
828 static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
830 block_state_t *bs = get_block_state(ges, bi);
831 return bs ? bs->end_state : bi->ws_end;
834 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
836 block_state_t *bs = get_block_state(ges, bi);
837 block_state_t *nw = obstack_alloc(&ges->obst, sizeof(nw[0]));
839 nw->next_intern = bs;
840 nw->next = ges->bs_top;
844 nw->pressure = bs->pressure;
845 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
848 nw->pressure = bi->pressure;
849 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
853 ges->bs_tops[bi->id] = nw;
854 ges->bs_tops_vers[bi->id] = ges->version;
858 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
860 irn_action_t *ia = obstack_alloc(&ges->obst, sizeof(ia[0]));
864 ia->act = irn_act_none;
865 ia->next = ges->ia_top;
870 static INLINE rollback_info_t trans_begin(global_end_state_t *ges)
873 rb.obst_level = obstack_base(&ges->obst);
874 rb.bs_top = ges->bs_top;
875 rb.ia_top = ges->ia_top;
879 static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
883 /* unwind all the stacks indiced with the block number */
884 for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
885 unsigned id = bs->bi->id;
886 ges->bs_tops[id] = bs->next_intern;
889 ges->ia_top = rb->ia_top;
890 ges->bs_top = rb->bs_top;
891 obstack_free(&ges->obst, rb->obst_level);
895 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
897 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
899 block_info_t *bi = get_block_info(bl);
900 const workset_t *end = get_end_state(ges, bi);
904 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
907 * to make the value available at end,
908 * we have several cases here.
910 * - we already visited that block.
911 * - If the value is in the final end set, return 0.
912 * somebody else already allocated it there.
913 * - If not and the final end set is already full,
914 * we cannot make the value available at the end
915 * of this block. return INFINITY.
916 * - Else (value not in final end set and there is room):
917 * 1) The value is in a register at the end of the local Belady pass.
918 * Allocate a slot in the final end set and return 0.
919 * 2) The value is not in the Belady end set:
920 * If the block's capacity is < k then check what it costs
921 * to transport the value from upper blocks to this block.
922 * Compare that against the reload cost in this block. If
923 * cheaper, do the other thing. If not, reload it here.
926 /* if the end set contains it already, it is in a reg and it costs nothing
927 * to load it to one. */
928 index = workset_get_index(end, irn);
930 unsigned ver = workset_get_version(end, index);
931 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
932 level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
935 * if the version is older, the value is already fixed
936 * and cannot be removed from the end set.
938 * If not, we will create a new block state for that block since
939 * we modify it by giving the end state a new version.
941 if (ver_is_younger(ver, ges->version)) {
942 block_state_t *bs = new_block_state(ges, bi);
943 workset_set_version(bs->end_state, index, ges->version);
951 * Now we have two options:
952 * 1) Reload the value at the end of the block.
953 * Therefore, perhaps, we have to erase another one from the workset.
954 * This may only be done if it has not been fixed.
955 * Since fixed means that a previous pass has decided that that value
956 * *has* to stay in the end set.
957 * 2) we can try, if the capacity of the block allows it, to let
958 * the value live through the block and make it available at
961 * First, we test the local (reload in this block) alternative
962 * and compare against the other alternative.
963 * Of course, we chose the cheaper one.
967 int n_regs = bi->free_at_jump;
968 int len = workset_get_length(end);
975 * look if there is room in the end array
976 * for the variable. Note that this does not
977 * mean that the var can live through the block.
978 * There is just room at the *end*
981 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
984 for (i = 0; i < len; ++i) {
985 unsigned ver = workset_get_version(end, i);
986 if (ver_is_younger(ver, ges->version))
991 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
992 level, end->vals[i].irn, i));
998 * finally there is some room. we can at least reload the value.
999 * but we will try to let ot live through anyhow.
1002 irn_action_t *vs = new_irn_action(ges, irn, bi->bl);
1003 block_state_t *bs = new_block_state(ges, bi);
1004 workset_t *end = bs->end_state;
1005 ir_node *ins_before = block_info_get_last_ins(bi);
1006 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
1007 int pressure_ok = bs->pressure < n_regs;
1009 if (reload_here < bi->reload_cost)
1013 * No matter what we do, the value will be in the end set
1014 * if the block from now on (of course only regarding the
1015 * current state). Enter it and set the new length
1018 end->vals[slot].irn = irn;
1019 workset_set_version(end, slot, ges->version);
1020 workset_set_length(end, MAX(workset_get_length(end), slot + 1));
1022 vs->act = irn_act_reload;
1025 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
1026 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
1029 /* look if we can bring the value in. */
1030 if (pressure_ok && reload_here > 0.0) {
1031 rollback_info_t rb = trans_begin(ges);
1032 double new_limit = MIN(reload_here, limit);
1034 vs->act = irn_act_live_through;
1036 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
1039 * if bring in is too expensive re-adjust the pressure
1040 * and roll back the state
1042 if (res >= reload_here) {
1044 vs->act = irn_act_reload;
1045 trans_rollback(ges, &rb);
1051 DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
1052 vs->act == irn_act_reload ? "reloading" : "bringing in"));
1057 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
1061 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
1063 belady_env_t *env = ges->env;
1064 double glob_costs = HUGE_VAL;
1066 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
1068 if (is_transport_in(bl, irn)) {
1069 int i, n = get_irn_arity(bl);
1070 ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0]));
1071 rollback_info_t rb = trans_begin(ges);
1074 for (i = 0; i < n; ++i) {
1075 ir_node *pr = get_Block_cfgpred_block(bl, i);
1076 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
1080 * there might by unknwons as operands of phis in that case
1081 * we set the costs to zero, since they won't get spilled.
1083 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, op))
1084 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
1090 if (glob_costs >= limit) {
1091 glob_costs = HUGE_VAL;
1092 trans_rollback(ges, &rb);
1099 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
1103 static void materialize_and_commit_end_state(global_end_state_t *ges)
1105 belady_env_t *env = ges->env;
1109 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
1112 * Perform all the variable actions.
1114 for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
1116 case irn_act_live_through:
1118 block_info_t *bi = get_block_info(ia->bl);
1121 if (is_local_phi(ia->bl, ia->irn)) {
1122 bitset_add_irn(ges->succ_phis, ia->irn);
1123 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
1126 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list)
1127 ++iter->sect_pressure;
1128 ++bi->front_pressure;
1131 case irn_act_reload:
1132 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
1133 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
1136 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
1141 * Commit the block end states
1143 for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
1144 block_info_t *bi = bs->bi;
1146 if (!bitset_is_set(ges->committed, bi->id)) {
1147 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
1148 // bes->bs->end_state->vals[idx].version = ges->version;
1149 workset_copy(env, bi->ws_end, bs->end_state);
1150 DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
1151 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
1152 bi->pressure = bs->pressure;
1153 bitset_set(ges->committed, bi->id);
1157 /* clear the committed bitset. the next call is expecting it. */
1158 bitset_clear_all(ges->committed);
1161 static ir_node *better_spilled_here(const bring_in_t *br)
1163 const block_info_t *bi = br->bi;
1164 double spill_ef = get_block_info(get_nodes_block(br->irn))->exec_freq;
1167 * If the bring in node is a phi in the bring in block,
1168 * we look at all definitions and sum up their execution frequencies,
1169 * since spills will be placed there.
1170 * (except for the case where an operand is also a phi which is spilled :-( )
1171 * If that cost is higher than spilling the phi in that block, we opt for
1172 * bringing the phi into the block and spill it there.
1174 if (is_local_phi(bi->bl, br->irn)) {
1175 ir_node *bl = bi->bl;
1179 for (i = get_Block_n_cfgpreds(bl) - 1; i >= 0; --i)
1180 spill_ef += get_block_info(get_Block_cfgpred_block(bl, i))->exec_freq;
1183 return bi->exec_freq < spill_ef ? sched_prev(bi->first_non_in) : NULL;
1186 static int get_max_pressure_so_far(const block_info_t *bi, const bring_in_t *br)
1188 const struct list_head *l;
1191 assert(br->bi == bi);
1192 for (l = &br->list; l != &bi->br_head; l = l->prev) {
1193 br = list_entry(l, bring_in_t, list);
1194 res = MAX(res, br->sect_pressure);
1197 /* finally consider the front pressure distance and add the reference line (the global block pressure) */
1198 return MAX(res, bi->front_pressure);
1201 #define block_last_bring_in(bi) list_entry((bi)->br_head.prev, bring_in_t, list)
1204 static int get_block_max_pressure(const block_info_t *bi)
1206 int max = get_max_pressure_so_far(bi, block_last_bring_in(bi));
1207 return MAX(bi->pressure, max);
1212 * Try to bring a variable into a block.
1213 * @param ges The state of all end sets.
1214 * @param block The block.
1215 * @param irn The variable.
1217 static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
1219 block_info_t *bi = br->bi;
1220 ir_node *irn = br->irn;
1221 ir_node *bl = bi->bl;
1222 belady_env_t *env = ges->env;
1223 void *reset_level = obstack_base(&ges->obst);
1224 int k = env->n_regs;
1225 int pressure_upto_use = get_max_pressure_so_far(bi, br);
1226 int front_pressure = bi->front_pressure;
1227 ir_node *better_spill_loc = NULL;
1229 assert(front_pressure <= k);
1230 assert(pressure_upto_use <= k);
1232 DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n",
1233 irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use));
1235 // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn));
1238 * if we cannot bring the value to the use, let's see ifit would be worthwhile
1239 * to bring the value to the beginning of the block to have a better spill
1242 * better _spilled_here will return a node where the value can be spilled after
1243 * or NULL if this block does not provide a better spill location.
1246 if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn))
1247 better_spill_loc = better_spilled_here(br);
1251 * If either we can bring the value to the use or we should try
1252 * to bring it here to do the spill here, let's try to bring it in.
1254 if (better_spill_loc || pressure_upto_use < k) {
1256 double bring_in_costs, local_costs;
1257 rollback_info_t trans;
1260 /* process all variables which shall be in a reg at
1261 * the beginning of the block in the order of the next use. */
1262 local_costs = be_get_reload_costs(env->senv, irn, br->first_use);
1264 /* reset the lists */
1268 /* if the variable will live into this block, we must adapt the pressure.
1269 * The new pressure is the MAX of:
1270 * 1) the total block pressure
1271 * 2) the pressure so far + the front pressure increase + 1
1273 * If the second is larger than the first,
1274 * we have to increment the total block pressure and hence
1275 * save the old pressure to restire it in case of failing to
1276 * bring the variable into the block in a register.
1278 trans = trans_begin(ges);
1279 bs = new_block_state(ges, bi);
1280 pressure_inc = MAX(bs->pressure, better_spill_loc ? front_pressure : pressure_upto_use + 1);
1281 bs->pressure = pressure_inc;
1284 assert(bi->pressure <= k);
1285 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1286 bring_in_costs = can_bring_in(ges, bl, irn, local_costs, 1);
1287 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f\n", bring_in_costs, local_costs));
1290 * Following cases can now occur:
1291 * 1) There is room and costs ok
1292 * 2) Cannot bring to use but can spill at begin and costs are ok
1293 * 3) neither of both worked.
1295 * following actions can be taken:
1297 * b) mark phi as succeded if node was phi
1298 * c) insert reload at use location
1299 * d) give a spill location hint
1301 * this is the case/action matrix
1309 /* the costs were acceptable... */
1310 if (bring_in_costs < local_costs) {
1315 * case 1 and first part of case 2:
1316 * commit all the changes done. this manifests the bring-in action.
1317 * if the transport-in was a phi (that is actually used in block)
1318 * mark it in the succ_phis set to *not* phi spill it.
1320 materialize_and_commit_end_state(ges);
1321 if (is_local_phi(bl, irn))
1322 bitset_add_irn(ges->succ_phis, irn);
1324 DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc));
1326 /* second half of case 2 */
1327 if (pressure_upto_use >= k) {
1328 DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n",
1329 br->first_use, better_spill_loc));
1330 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1331 be_add_spill(env->senv, irn, sched_next(better_spill_loc));
1335 * go from the last bring in use to the first and add all the variabled
1336 * which additionally live through the block to their pressure.
1337 * at the point were the actually treated use is, we have to increase
1338 * the pressure by one more as the nrought in value starts to count.
1339 * Finally, adjust the front pressure as well.
1342 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) {
1344 pressure_inc += pressure_upto_use < k;
1345 iter->sect_pressure += pressure_inc;
1346 check = MAX(check, iter->sect_pressure);
1347 DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure));
1349 bi->front_pressure += pressure_inc;
1350 assert(MAX(check, bi->front_pressure) <= bi->pressure);
1351 DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure));
1354 /* case 3: nothing worked. insert normal reload and rollback. */
1356 DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use));
1357 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1358 bitset_add_irn(env->spilled, irn);
1359 trans_rollback(ges, &trans);
1363 /* there was no opportunity for optimization at all. reload and be sad ... */
1365 DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use));
1366 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1367 bitset_add_irn(env->spilled, irn);
1370 DBG((dbg, DBG_GLOBAL, "\n"));
1372 /* reset the obstack and create a new version. */
1373 obstack_free(&ges->obst, reset_level);
1374 ges->version = ver_make_newer(ges->version);
1377 static bring_in_t **determine_global_order(belady_env_t *env)
1383 for (i = env->n_blocks - 1; i >= 0; --i) {
1384 block_info_t *bi = get_block_info(env->blocks[i]);
1385 list_for_each_entry(bring_in_t, elm, &bi->br_head, list) {
1386 obstack_ptr_grow(&env->ob, elm);
1391 obstack_ptr_grow(&env->ob, NULL);
1392 res = obstack_finish(&env->ob);
1393 qsort(res, n, sizeof(res[0]), bring_in_cmp);
1400 static void global_assign(belady_env_t *env)
1402 global_end_state_t ges;
1407 * sort the blocks according to execution frequency.
1408 * That's not necessary for belady() but for the global pass later on.
1410 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_dfs_gt);
1412 memset(&ges, 0, sizeof(ges));
1413 obstack_init(&ges.obst);
1415 ges.version = ver_make_newer(ver_oldest);
1416 ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1417 ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1418 ges.bs_tops = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0]) * env->n_blocks);
1419 ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks);
1421 /* invalidate all state stack pointer versions */
1422 for (i = 0; i < env->n_blocks; ++i) {
1423 block_info_t *bi = get_block_info(env->blocks[i]);
1424 ges.bs_tops_vers[i] = ver_oldest;
1426 /* Set all block end sets entries to the youngest version */
1427 for (j = workset_get_length(bi->ws_end) - 1; j >= 0; --j)
1428 workset_set_version(bi->ws_end, j, ver_youngest);
1431 /* determine ordeer and optimize them */
1432 for (br = determine_global_order(env); *br; ++br)
1433 optimize_variable(&ges, *br);
1436 * Now we spill phis which cannot be kept since they were replaced
1437 * by reloads at the block entrances.
1439 for (i = 0; i < env->n_blocks; ++i) {
1440 ir_node *bl = env->blocks[i];
1443 sched_foreach(bl, irn) {
1447 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn)
1448 && !bitset_contains_irn(ges.succ_phis, irn))
1449 be_spill_phi(env->senv, irn);
1454 static void collect_blocks(ir_node *bl, void *data)
1456 belady_env_t *env = data;
1458 obstack_ptr_grow(&env->ob, bl);
1462 * Do spilling for a register class on a graph using the belady heuristic.
1463 * In the transformed graph, the register pressure never exceeds the number
1464 * of available registers.
1466 * @param birg The backend graph
1467 * @param cls The register class to spill
1469 void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
1471 ir_graph *irg = be_get_birg_irg(birg);
1475 /* some special classes contain only ignore regs, nothing to do then */
1476 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1480 be_clear_links(irg);
1482 /* init belady env */
1483 obstack_init(&env.ob);
1485 env.arch = be_get_birg_arch_env(birg);
1487 env.lv = be_get_birg_liveness(birg);
1488 env.dfs = env.lv->dfs;
1489 env.n_regs = n_regs;
1490 env.ws = new_workset(&env, &env.ob);
1491 env.senv = be_new_spill_env(birg);
1492 env.ef = be_get_birg_exec_freq(birg);
1493 env.spilled = bitset_irg_obstack_alloc(&env.ob, irg);
1496 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1497 obstack_ptr_grow(&env.ob, NULL);
1498 env.blocks = obstack_finish(&env.ob);
1500 /* renumbering in the blocks gives nicer debug output as number are smaller. */
1501 #ifdef DEBUG_libfirm
1502 for (i = 0; i < env.n_blocks; ++i)
1503 sched_renumber(env.blocks[i]);
1506 /* Fix high register pressure in blocks with belady algorithm */
1507 for (i = 0; i < env.n_blocks; ++i)
1510 global_assign(&env);
1512 /* Insert spill/reload nodes into the graph and fix usages */
1513 be_insert_spills_reloads(env.senv);
1516 be_delete_spill_env(env.senv);
1518 obstack_free(&env.ob, NULL);
1521 void be_init_spillbelady2(void)
1523 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
1524 lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
1525 lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2");
1527 static be_spiller_t belady_spiller = {
1531 lc_opt_add_table(bel2_grp, options);
1532 be_register_spiller("belady2", &belady_spiller);
1533 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1536 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);