2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Sebastian Hack, Matthias Braun, Daniel Grund
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
43 #include "irnodeset.h"
45 #include "irprintf_t.h"
51 #include "iredges_t.h"
52 #include "irphase_t.h"
61 #include "bespillbelady.h"
62 #include "besched_t.h"
66 #include "bechordal_t.h"
67 #include "bespilloptions.h"
68 #include "beloopana.h"
72 #include <libcore/lc_opts.h>
73 #include <libcore/lc_opts_enum.h>
74 #include <libcore/lc_timing.h>
83 #define DBG_WORKSET 128
84 #define DBG_GLOBAL 256
86 #define ALREADY_SPILLED_FACTOR 2
89 #define LIVE_END (DEAD-1)
90 #define REMAT_DIST (DEAD-2)
92 static int already_spilled_factor = 2;
93 static int remat_live_range_ext = 1;
94 static int global_pass_enabled = 1;
96 static const lc_opt_table_entry_t options[] = {
97 LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor),
98 LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
99 LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled),
103 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
106 * An association between a node and a point in time.
108 typedef struct _loc_t {
109 ir_node *irn; /**< A node. */
110 unsigned time; /**< A use time.
111 In the global pass this is used
112 as the version number and not as a time.
113 Only to save space...
117 typedef struct _workset_t {
118 int len; /**< current length */
119 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
122 typedef struct _belady_env_t {
126 const arch_env_t *arch;
127 const arch_register_class_t *cls;
131 ir_node **blocks; /**< Array of all blocks. */
132 int n_blocks; /**< Number of blocks in the graph. */
133 int n_regs; /**< number of regs in this reg-class */
134 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
135 ir_node *instr; /**< current instruction */
136 int instr_nr; /**< current instruction number (relative to block start) */
138 spill_env_t *senv; /**< see bespill.h */
139 bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */
140 struct list_head bring_in_head;
145 static int loc_compare(const void *a, const void *b)
149 return (p->time > q->time) - (p->time < q->time);
152 static INLINE void workset_print(const workset_t *w)
156 for(i = 0; i < w->len; ++i) {
157 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
162 * Alloc a new workset on obstack @p ob with maximum size @p max
164 static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
166 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
167 res = obstack_alloc(ob, size);
168 memset(res, 0, size);
173 * Alloc a new instance on obstack and make it equal to @param ws
175 static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
177 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
178 res = obstack_alloc(ob, size);
179 memcpy(res, ws, size);
184 * Do NOT alloc anything. Make @param tgt equal to @param src.
185 * returns @param tgt for convenience
187 static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
188 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
189 memcpy(tgt, src, size);
194 * Overwrites the current content array of @param ws with the
195 * @param count locations given at memory @param locs.
196 * Set the length of @param ws to count.
198 static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
199 workset->len = count;
200 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
204 * Inserts the value @p val into the workset, iff it is not
205 * already contained. The workset must not be full.
207 static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
209 /* check for current regclass */
210 if (!arch_irn_consider_in_reg_alloc(env->arch, env->cls, val)) {
211 // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
215 /* check if val is already contained */
216 for(i=0; i<ws->len; ++i)
217 if (ws->vals[i].irn == val)
221 assert(ws->len < env->n_regs && "Workset already full!");
222 ws->vals[ws->len++].irn = val;
226 * Removes all entries from this workset
228 static INLINE void workset_clear(workset_t *ws) {
233 * Removes the value @p val from the workset if present.
235 static INLINE void workset_remove(workset_t *ws, ir_node *val) {
237 for(i=0; i<ws->len; ++i) {
238 if (ws->vals[i].irn == val) {
239 ws->vals[i] = ws->vals[--ws->len];
245 static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) {
247 for(i=0; i<ws->len; ++i) {
248 if (ws->vals[i].irn == val)
256 * Iterates over all values in the working set.
257 * @p ws The workset to iterate
258 * @p v A variable to put the current value in
259 * @p i An integer for internal use
261 #define workset_foreach(ws, v, i) for(i=0; \
262 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
265 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
266 #define workset_get_time(ws, i) (ws)->vals[i].time
267 #define workset_set_length(ws, length) (ws)->len = length
268 #define workset_get_length(ws) ((ws)->len)
269 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
270 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
271 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
273 typedef struct _bring_in_t bring_in_t;
275 typedef struct _block_info_t {
278 workset_t *ws_start, *ws_end;
282 ir_node **first_uses; /**< first users of a node in entrance_reg.
283 use its index for here too. */
284 ir_node *first_non_in; /**< First node in block which is not a phi. */
285 ir_node *last_ins; /**< The instruction before which end of
286 block reloads will be inserted. */
288 workset_t *entrance_reg; /**< That set will contain all values
289 transported into the block which
290 are used before they are displaced.
291 That means, we later have to care to
292 bring them into the block in a register
293 or reload them at the entry of the block. */
295 int pressure; /**< The amount of registers which remain free
296 in this block. This capacity can be used to let
297 global variables, transported into other blocks,
298 live through this block. */
300 double exec_freq; /**< The execution frequency of this block. */
301 double reload_cost; /**< Cost of a reload in this block. */
303 int front_pressure_inc;
307 static INLINE void *new_block_info(belady_env_t *bel, int id)
309 ir_node *bl = bel->blocks[id];
310 block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
311 memset(res, 0, sizeof(res[0]));
312 res->first_non_in = NULL;
313 res->last_ins = NULL;
317 res->first_uses = obstack_alloc(&bel->ob, sizeof(res->first_uses[0]) * bel->n_regs);
318 res->entrance_reg = new_workset(bel, &bel->ob);
319 res->exec_freq = get_block_execfreq(bel->ef, bl);
320 res->reload_cost = bel->arch->isa->reload_cost * res->exec_freq;
321 set_irn_link(bl, res);
325 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
326 #define set_block_info(block, info) set_irn_link(block, info)
328 static INLINE ir_node *block_info_get_last_ins(block_info_t *bi)
331 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
336 typedef struct _next_use_t {
337 unsigned is_first_use : 1; /**< Indicate that this use is the first
338 in the block. Needed to identify
339 transport in values for the global
341 sched_timestep_t step; /**< The time step of the use. */
343 struct _next_use_t *next; /**< The next use int this block
347 static void *next_use_init(ir_phase *phase, ir_node *irn, void *old)
355 static void build_next_uses(block_info_t *bi)
359 sched_renumber(bi->bl);
361 phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
362 sched_foreach_reverse(bi->bl, irn) {
368 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
369 ir_node *op = get_irn_n(irn, i);
370 next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
371 next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
373 use->is_first_use = 1;
374 use->step = sched_get_time_step(irn);
379 curr->is_first_use = 0;
380 assert(curr->step >= use->step);
383 phase_set_irn_data(&bi->next_uses, op, use);
388 #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn))
390 static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn)
392 next_use_t *use = get_current_use(bi, irn);
395 phase_set_irn_data(&bi->next_uses, irn, use->next);
398 static __attribute__((unused)) int block_freq_gt(const void *a, const void *b)
400 const ir_node * const *p = a;
401 const ir_node * const *q = b;
402 block_info_t *pi = get_block_info(*p);
403 block_info_t *qi = get_block_info(*q);
404 double diff = qi->exec_freq - pi->exec_freq;
405 return (diff > 0) - (diff < 0);
408 static int block_freq_dfs_gt(const void *a, const void *b)
410 const ir_node * const *p = a;
411 const ir_node * const *q = b;
412 block_info_t *pi = get_block_info(*p);
413 block_info_t *qi = get_block_info(*q);
417 if ((pi->exec_freq > 1.0 && qi->exec_freq > 1.0)
418 || (pi->exec_freq <= 1.0 && qi->exec_freq <= 1.0)) {
420 if (pi->exec_freq == qi->exec_freq) {
422 const dfs_t *dfs = pi->bel->dfs;
423 int pp = dfs_get_post_num(dfs, pi->bl);
424 int pq = dfs_get_post_num(dfs, qi->bl);
428 diff = qi->exec_freq - pi->exec_freq;
429 return (diff > 0) - (diff < 0);
433 ir_node *irn; /**< The node to bring in. */
434 const block_info_t *bi; /**< The block to which bring in should happen. */
435 int pressure_so_far; /**< The maximal pressure till the first use of irn in bl. */
436 ir_node *first_use; /**< The first user of irn in bl. */
437 sched_timestep_t use_step; /**< Schedule sttep of the first use. */
439 int is_remat : 1; /**< Is rematerializable. */
440 struct list_head list;
443 static INLINE bring_in_t *new_bring_in(const block_info_t *bi, ir_node *irn, const next_use_t *use)
445 belady_env_t *env = bi->bel;
446 bring_in_t *bri = obstack_alloc(&env->ob, sizeof(bi[0]));
450 bri->first_use = use->irn;
451 bri->use_step = use->step;
452 bri->is_remat = be_is_rematerializable(bi->bel->senv, irn, use->irn);
453 bri->pressure_so_far = bi->pressure;
454 INIT_LIST_HEAD(&bri->list);
455 list_add_tail(&bri->list, &env->bring_in_head);
456 env->n_bring_in += 1;
460 static int bring_in_cmp(const void *a, const void *b)
462 const bring_in_t *p = *(const bring_in_t * const *) a;
463 const bring_in_t *q = *(const bring_in_t * const *) b;
466 /* if one of both is a remat node, it will be done after the other. */
467 if (p->is_remat != q->is_remat)
468 return p->is_remat - q->is_remat;
470 /* in the same block, the one further in the front has to be processed first!
471 * Otherwise the front_pressure 'trick' is not exact. */
473 return p->use_step - q->use_step;
475 fp = p->bi->exec_freq;
476 fq = q->bi->exec_freq;
478 /* if both have the same frequency, inspect the frequency of the definition */
480 double fdp = get_block_info(get_nodes_block(p->irn))->exec_freq;
481 double fdq = get_block_info(get_nodes_block(q->irn))->exec_freq;
483 /* if the defs of both have the same freq, we go for reverse dfs post order. */
485 const dfs_t *dfs = p->bi->bel->dfs;
486 int pp = dfs_get_post_num(dfs, p->bi->bl);
487 int pq = dfs_get_post_num(dfs, q->bi->bl);
491 return (fdq > fdp) - (fdq < fdp);
494 return (fq > fp) - (fq < fp);
497 static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
499 belady_env_t *env = bi->bel;
500 sched_timestep_t curr_step = sched_get_time_step(env->instr);
501 next_use_t *use = get_current_use(bi, irn);
502 int flags = arch_irn_get_flags(env->arch, irn);
504 assert(!(flags & arch_irn_flags_ignore));
506 /* We have to keep nonspillable nodes in the workingset */
507 if(flags & arch_irn_flags_dont_spill)
510 if (!is_usage && use && use->step == curr_step)
514 unsigned res = use->step - curr_step;
516 assert(use->step >= curr_step);
519 if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
521 else if (bitset_contains_irn(env->spilled, irn))
522 res *= already_spilled_factor;
528 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
531 static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn)
533 return is_Phi(irn) && get_nodes_block(irn) == bl;
537 * Check, if the value is something that is transported into a block.
538 * That is, the value is defined elsewhere or defined by a Phi in the block.
539 * @param env The belady environment.
540 * @param bl The block in question.
541 * @param irn The node in question.
542 * @return 1, if node is something transported into @p bl, 0 if not.
543 * @note The function will only give correct answers in the case
544 * where @p irn is unsed in the block @p bl which is always
545 * the case in our usage scenario.
547 static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn)
549 return get_nodes_block(irn) != bl || is_Phi(irn);
553 * Performs the actions necessary to grant the request that:
554 * - new_vals can be held in registers
555 * - as few as possible other values are disposed
556 * - the worst values get disposed
558 * @p is_usage indicates that the values in new_vals are used (not defined)
559 * In this case reloads must be performed
561 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
562 belady_env_t *env = bi->bel;
563 workset_t *ws = env->ws;
564 ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0]));
566 int i, len, max_allowed, demand, iter;
570 1. Identify the number of needed slots and the values to reload
573 workset_foreach(new_vals, val, iter) {
574 /* mark value as used */
576 if (! workset_contains(ws, val)) {
577 DBG((dbg, DBG_DECIDE, "\t\tinsert %+F\n", val));
578 to_insert[demand++] = val;
580 int insert_reload = 1;
581 next_use_t *use = get_current_use(bi, val);
584 * if we use a value which is transported in this block, i.e. a
585 * phi defined here or a live in, for the first time, we check
586 * if there is room for that guy to survive from the block's
587 * entrance to here or not.
590 assert(sched_get_time_step(env->instr) == (int) use->step);
591 if (is_transport_in(bi->bl, val) && use->is_first_use) {
592 bring_in_t *bri = new_bring_in(bi, val, use);
593 DBG((dbg, DBG_DECIDE, "\t\tbring in node %+F, pressure %d:\n", val, bi->pressure));
594 bri->first_use = env->instr;
596 DBG((dbg, DBG_DECIDE, "\t\tno reload. must be considered at block start\n"));
599 // if (insert_reload) {
601 bitset_add_irn(env->spilled, val);
602 DBG((dbg, DBG_SPILL, "\t\tReload %+F before %+F\n", val, env->instr));
603 be_add_reload(env->senv, val, env->instr, env->cls, 1);
607 assert(is_usage || "Defined value already in workset?!?");
608 DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val));
611 DBG((dbg, DBG_DECIDE, "\t\tdemand = %d\n", demand));
614 2. Make room for at least 'demand' slots
616 len = workset_get_length(ws);
617 max_allowed = env->n_regs - demand;
619 /* Only make more free room if we do not have enough */
620 if (len > max_allowed) {
621 DBG((dbg, DBG_DECIDE, "\t\tdisposing %d values\n", len - max_allowed));
623 /* get current next-use distance */
624 for (i = 0; i < ws->len; ++i) {
625 ir_node *val = workset_get_val(ws, i);
626 unsigned dist = get_curr_distance(bi, val, is_usage);
627 workset_set_time(ws, i, dist);
630 /* sort entries by increasing nextuse-distance*/
633 /* kill the last 'demand' entries in the array */
634 workset_set_length(ws, max_allowed);
638 3. Insert the new values into the workset
639 Also, we update the pressure in the block info.
640 That is important for the global pass to decide
641 how many values can live through the block.
643 for (i = 0; i < demand; ++i)
644 workset_insert(env, env->ws, to_insert[i]);
646 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
650 * For the given block @p block, decide for each values
651 * whether it is used from a register or is reloaded
654 static void belady(belady_env_t *env, int id) {
655 block_info_t *block_info = new_block_info(env, id);
656 const ir_node *block = block_info->bl;
662 DBG((dbg, DBG_WSETS, "Belady on %+F\n", block_info->bl));
663 new_vals = new_workset(env, &env->ob);
664 workset_clear(env->ws);
666 /* build the next use information for this block. */
667 build_next_uses(block_info);
670 block_info->first_non_in = NULL;
672 /* process the block from start to end */
673 sched_foreach(block, irn) {
675 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
677 /* projs are handled with the tuple value.
678 * Phis are no real instr (see insert_starters())
679 * instr_nr does not increase */
680 if (is_Proj(irn) || is_Phi(irn)) {
681 // DBG((dbg, DBG_DECIDE, " ...%+F skipped\n", irn));
684 DBG((dbg, DBG_DECIDE, "\t%+F\n", irn));
686 if (!block_info->first_non_in)
687 block_info->first_non_in = irn;
689 /* set instruction in the workset */
692 /* allocate all values _used_ by this instruction */
693 workset_clear(new_vals);
694 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
695 workset_insert(env, new_vals, get_irn_n(irn, i));
697 DBG((dbg, DBG_DECIDE, "\t* uses\n"));
698 displace(block_info, new_vals, 1);
701 * set all used variables to the next use in their next_use_t list
702 * Also, kill all dead variables from the workset. They are only
703 * augmenting the pressure. Note, that a variable is dead
704 * if it has no further use in this block and is *not* live end
706 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
707 ir_node *op = get_irn_n(irn, i);
708 next_use_t *use = get_current_use(block_info, op);
711 if (!use->next && !be_is_live_end(env->lv, block, op))
712 workset_remove(env->ws, op);
714 advance_current_use(block_info, op);
717 /* allocate all values _defined_ by this instruction */
718 workset_clear(new_vals);
719 if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
720 const ir_edge_t *edge;
722 foreach_out_edge(irn, edge) {
723 ir_node *proj = get_edge_src_irn(edge);
724 workset_insert(env, new_vals, proj);
727 workset_insert(env, new_vals, irn);
729 DBG((dbg, DBG_DECIDE, "\t* defs\n"));
730 displace(block_info, new_vals, 0);
735 phase_free(&block_info->next_uses);
737 /* Remember end-workset for this block */
738 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
739 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
740 workset_foreach(block_info->ws_end, irn, iter)
741 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
742 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
747 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
748 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
749 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
750 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
755 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
756 #define workset_get_version(ws, i) ((ws)->vals[(i)].time)
758 #define ver_oldest (0)
759 #define ver_youngest ((unsigned) -1)
760 #define ver_make_newer(v) ((v) + 1)
761 #define ver_is_older(v, w) ((v) < (w))
762 #define ver_is_younger(v, w) ((v) > (w))
770 typedef struct _block_state_t {
771 struct _block_state_t *next;
772 struct _block_state_t *next_intern;
775 unsigned front_pressure_inc;
776 workset_t *end_state;
779 typedef struct _irn_action_t {
780 struct _irn_action_t *next;
786 typedef struct _global_end_state_t {
794 unsigned *bs_tops_vers;
795 block_state_t **bs_tops;
796 block_state_t *bs_top;
797 irn_action_t *ia_top;
798 } global_end_state_t;
802 block_state_t *bs_top;
803 irn_action_t *ia_top;
806 static INLINE block_state_t *get_block_state(global_end_state_t *ges, block_info_t *bi)
809 assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
810 return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
813 static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
815 block_state_t *bs = get_block_state(ges, bi);
816 return bs ? bs->end_state : bi->ws_end;
819 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
821 block_state_t *bs = get_block_state(ges, bi);
822 block_state_t *nw = obstack_alloc(&ges->obst, sizeof(nw[0]));
824 nw->next_intern = bs;
825 nw->next = ges->bs_top;
829 nw->pressure = bs->pressure;
830 nw->front_pressure_inc = bs->front_pressure_inc;
831 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
834 nw->pressure = bi->pressure;
835 nw->front_pressure_inc = bi->front_pressure_inc;
836 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
840 ges->bs_tops[bi->id] = nw;
841 ges->bs_tops_vers[bi->id] = ges->version;
845 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
847 irn_action_t *ia = obstack_alloc(&ges->obst, sizeof(ia[0]));
851 ia->act = irn_act_none;
852 ia->next = ges->ia_top;
857 static INLINE rollback_info_t trans_begin(global_end_state_t *ges)
860 rb.obst_level = obstack_base(&ges->obst);
861 rb.bs_top = ges->bs_top;
862 rb.ia_top = ges->ia_top;
866 static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
870 /* unwind all the stacks indiced with the block number */
871 for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
872 unsigned id = bs->bi->id;
873 ges->bs_tops[id] = bs->next_intern;
876 ges->ia_top = rb->ia_top;
877 ges->bs_top = rb->bs_top;
878 obstack_free(&ges->obst, rb->obst_level);
882 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
884 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
886 block_info_t *bi = get_block_info(bl);
887 const workset_t *end = get_end_state(ges, bi);
891 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
894 * to make the value available at end,
895 * we have several cases here.
897 * - we already visited that block.
898 * - If the value is in the final end set, return 0.
899 * somebody else already allocated it there.
900 * - If not and the final end set is already full,
901 * we cannot make the value available at the end
902 * of this block. return INFINITY.
903 * - Else (value not in final end set and there is room):
904 * 1) The value is in a register at the end of the local Belady pass.
905 * Allocate a slot in the final end set and return 0.
906 * 2) The value is not in the Belady end set:
907 * If the block's capacity is < k then check what it costs
908 * to transport the value from upper blocks to this block.
909 * Compare that against the reload cost in this block. If
910 * cheaper, do the other thing. If not, reload it here.
913 /* if the end set contains it already, it is in a reg and it costs nothing
914 * to load it to one. */
915 index = workset_get_index(end, irn);
917 unsigned ver = workset_get_version(end, index);
918 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
919 level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
922 * if the version is older, the value is already fixed
923 * and cannot be removed from the end set.
925 * If not, we will create a new block state for that block since
926 * we modify it by giving the end state a new version.
928 if (ver_is_younger(ver, ges->version)) {
929 block_state_t *bs = new_block_state(ges, bi);
930 workset_set_version(bs->end_state, index, ges->version);
938 * Now we have two options:
939 * 1) Reload the value at the end of the block.
940 * Therefore, perhaps, we have to erase another one from the workset.
941 * This may only be done if it has not been fixed.
942 * Since fixed means that a previous pass has decided that that value
943 * *has* to stay in the end set.
944 * 2) we can try, if the capacity of the block allows it, to let
945 * the value live through the block and make it available at
948 * First, we test the local (reload in this block) alternative
949 * and compare against the other alternative.
950 * Of course, we chose the cheaper one.
954 int n_regs = bi->bel->n_regs;
955 int len = workset_get_length(end);
962 * look if there is room in the end array
963 * for the variable. Note that this does not
964 * mean that the var can live through the block.
965 * There is just room at the *end*
968 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
971 for (i = 0; i < len; ++i) {
972 unsigned ver = workset_get_version(end, i);
973 if (ver_is_younger(ver, ges->version))
978 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
979 level, end->vals[i].irn, i));
985 * finally there is some room. we can at least reload the value.
986 * but we will try to let ot live through anyhow.
989 irn_action_t *vs = new_irn_action(ges, irn, bi->bl);
990 block_state_t *bs = new_block_state(ges, bi);
991 workset_t *end = bs->end_state;
992 ir_node *ins_before = block_info_get_last_ins(bi);
993 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
994 int pressure_ok = bs->pressure < (unsigned) n_regs;
996 if (reload_here < bi->reload_cost)
1000 * No matter what we do, the value will be in the end set
1001 * if the block from now on (of course only regarding the
1002 * current state). Enter it and set the new length
1005 end->vals[slot].irn = irn;
1006 workset_set_version(end, slot, ges->version);
1007 workset_set_length(end, MAX(workset_get_length(end), slot + 1));
1009 vs->act = irn_act_reload;
1012 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
1013 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
1016 /* look if we can bring the value in. */
1017 if (pressure_ok && reload_here > 0.0) {
1018 rollback_info_t rb = trans_begin(ges);
1019 double new_limit = MIN(reload_here, limit);
1021 vs->act = irn_act_live_through;
1023 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
1026 * if bring in is too expensive re-adjust the pressure
1027 * and roll back the state
1029 if (res >= reload_here) {
1031 vs->act = irn_act_reload;
1032 trans_rollback(ges, &rb);
1038 DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
1039 vs->act == irn_act_reload ? "reloading" : "bringing in"));
1044 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
1048 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
1050 belady_env_t *env = ges->env;
1051 double glob_costs = HUGE_VAL;
1053 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
1055 if (is_transport_in(bl, irn)) {
1056 int i, n = get_irn_arity(bl);
1057 ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0]));
1058 rollback_info_t rb = trans_begin(ges);
1061 for (i = 0; i < n; ++i) {
1062 ir_node *pr = get_Block_cfgpred_block(bl, i);
1063 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
1067 * there might by unknwons as operands of phis in that case
1068 * we set the costs to zero, since they won't get spilled.
1070 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, op))
1071 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
1077 if (glob_costs >= limit) {
1078 glob_costs = HUGE_VAL;
1079 trans_rollback(ges, &rb);
1086 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
1090 static void materialize_and_commit_end_state(global_end_state_t *ges)
1092 belady_env_t *env = ges->env;
1096 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
1099 * Perform all the variable actions.
1101 for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
1103 case irn_act_live_through:
1104 if (is_local_phi(ia->bl, ia->irn)) {
1105 bitset_add_irn(ges->succ_phis, ia->irn);
1106 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
1109 case irn_act_reload:
1110 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
1111 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
1114 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
1119 * Commit the block end states
1121 for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
1122 block_info_t *bi = bs->bi;
1124 if (!bitset_is_set(ges->committed, bi->id)) {
1125 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
1126 // bes->bs->end_state->vals[idx].version = ges->version;
1127 workset_copy(env, bi->ws_end, bs->end_state);
1128 DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
1129 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
1130 bi->pressure = bs->pressure;
1131 bi->front_pressure_inc = bs->front_pressure_inc;
1132 bitset_set(ges->committed, bi->id);
1136 /* clear the committed bitset. the next call is expecting it. */
1137 bitset_clear_all(ges->committed);
1141 * Try to bring a variable into a block.
1142 * @param ges The state of all end sets.
1143 * @param block The block.
1144 * @param irn The variable.
1146 static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
1148 ir_node *irn = br->irn;
1149 ir_node *bl = br->bi->bl;
1150 block_info_t *bi = get_block_info(bl);
1151 belady_env_t *env = ges->env;
1152 void *reset_level = obstack_base(&ges->obst);
1153 int front_pressure = bi->front_pressure_inc + br->pressure_so_far;
1155 assert(bi->front_pressure_inc <= env->n_regs);
1157 DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr inc: %d, pr so far: %d, first use: %u\n",
1158 irn, bl, bi->exec_freq, bi->front_pressure_inc, br->pressure_so_far, br->first_use));
1160 if (front_pressure >= env->n_regs) {
1161 DBG((dbg, DBG_GLOBAL, "no front capacity left. reload here\n"));
1162 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1167 double bring_in_costs;
1169 rollback_info_t trans;
1172 /* process all variables which shall be in a reg at
1173 * the beginning of the block in the order of the next use. */
1174 local_costs = be_get_reload_costs(env->senv, irn, br->first_use);
1176 /* reset the lists */
1180 /* if the variable will live into this block, we must adapt the pressure.
1181 * The new pressure is the MAX of:
1182 * 1) the total block pressure
1183 * 2) the pressure so far + the front pressure increase + 1
1185 * If the second is larger than the first,
1186 * we have to increment the total block pressure and hence
1187 * save the old pressure to restire it in case of failing to
1188 * bring the variable into the block in a register.
1190 trans = trans_begin(ges);
1191 bs = new_block_state(ges, bi);
1192 bs->front_pressure_inc += 1;
1193 bs->pressure = MAX(bs->pressure, bs->front_pressure_inc);
1195 assert(bi->pressure <= env->n_regs);
1196 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1197 bring_in_costs = can_bring_in(ges, bl, irn, local_costs, 1);
1198 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f", bring_in_costs, local_costs));
1201 * we were not able to let the value arrive
1202 * in a register at the entrance of the block
1203 * or it is too costly, so we have to do the reload locally.
1204 * Also revert the register pressure.
1206 if (bring_in_costs >= local_costs) {
1207 DBG((dbg, DBG_GLOBAL, " -> do local reload before %+F in %+F\n", br->first_use, bl));
1208 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1209 trans_rollback(ges, &trans);
1213 * if the transport-in was a phi (that is actually used in block)
1214 * mark it in the succ_phis set to *not* phi spill it.
1216 if (is_local_phi(bl, irn))
1217 bitset_add_irn(ges->succ_phis, irn);
1219 DBG((dbg, DBG_GLOBAL, " -> bring it in\n"));
1220 materialize_and_commit_end_state(ges);
1224 DBG((dbg, DBG_GLOBAL, "\n"));
1226 /* reset the obstack and create a new version. */
1227 obstack_free(&ges->obst, reset_level);
1228 ges->version = ver_make_newer(ges->version);
1232 struct list_head list;
1235 } node_block_pair_t;
1237 static INLINE node_block_pair_t *new_node_block_pair(global_end_state_t *ges, ir_node *bl, ir_node *irn)
1239 node_block_pair_t *p = obstack_alloc(&ges->obst, sizeof(p[0]));
1240 INIT_LIST_HEAD(&p->list);
1246 static void determine_global_order(belady_env_t *env)
1248 bring_in_t **sortarr = xmalloc(env->n_bring_in * sizeof(sortarr[0]));
1252 list_for_each_entry(bring_in_t, elm, &env->bring_in_head, list)
1255 qsort(sortarr, env->n_bring_in, sizeof(sortarr[0]), bring_in_cmp);
1257 INIT_LIST_HEAD(&env->bring_in_head);
1258 for (i = 0, n = env->n_bring_in; i < n; ++i) {
1259 INIT_LIST_HEAD(&sortarr[i]->list);
1260 list_add_tail(&sortarr[i]->list, &env->bring_in_head);
1266 static void global_assign(belady_env_t *env)
1268 global_end_state_t ges;
1273 * sort the blocks according to execution frequency.
1274 * That's not necessary for belady() but for the global pass later on.
1276 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_dfs_gt);
1278 memset(&ges, 0, sizeof(ges));
1279 obstack_init(&ges.obst);
1281 ges.version = ver_make_newer(ver_oldest);
1282 ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1283 ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1284 ges.bs_tops = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0]) * env->n_blocks);
1285 ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks);
1287 /* invalidate all state stack pointer versions */
1288 for (i = 0; i < env->n_blocks; ++i) {
1289 block_info_t *bi = get_block_info(env->blocks[i]);
1290 ges.bs_tops_vers[i] = ver_oldest;
1292 /* Set all block end sets entries to the youngest version */
1293 for (j = workset_get_length(bi->ws_end) - 1; j >= 0; --j)
1294 workset_set_version(bi->ws_end, j, ver_youngest);
1297 /* Determine order for processing the global variables */
1298 determine_global_order(env);
1301 list_for_each_entry(bring_in_t, br, &env->bring_in_head, list)
1302 optimize_variable(&ges, br);
1305 * Now we spill phis which cannot be kept since they were replaced
1306 * by reloads at the block entrances.
1308 for (i = 0; i < env->n_blocks; ++i) {
1309 ir_node *bl = env->blocks[i];
1312 sched_foreach(bl, irn) {
1316 if (arch_irn_consider_in_reg_alloc(env->arch, env->cls, irn)
1317 && !bitset_contains_irn(ges.succ_phis, irn))
1318 be_spill_phi(env->senv, irn);
1323 static void collect_blocks(ir_node *bl, void *data)
1325 belady_env_t *env = data;
1327 obstack_ptr_grow(&env->ob, bl);
1331 * Do spilling for a register class on a graph using the belady heuristic.
1332 * In the transformed graph, the register pressure never exceeds the number
1333 * of available registers.
1335 * @param birg The backend graph
1336 * @param cls The register class to spill
1338 void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
1340 ir_graph *irg = be_get_birg_irg(birg);
1344 /* some special classes contain only ignore regs, nothing to do then */
1345 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1349 be_clear_links(irg);
1351 /* init belady env */
1352 obstack_init(&env.ob);
1354 env.arch = be_get_birg_arch_env(birg);
1356 env.lv = be_get_birg_liveness(birg);
1357 env.dfs = env.lv->dfs;
1358 env.n_regs = n_regs;
1359 env.ws = new_workset(&env, &env.ob);
1360 env.senv = be_new_spill_env(birg);
1361 env.ef = be_get_birg_exec_freq(birg);
1362 env.spilled = bitset_irg_obstack_alloc(&env.ob, irg);
1365 INIT_LIST_HEAD(&env.bring_in_head);
1367 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1368 obstack_ptr_grow(&env.ob, NULL);
1369 env.blocks = obstack_finish(&env.ob);
1371 /* Fix high register pressure in blocks with belady algorithm */
1372 for (i = 0; i < env.n_blocks; ++i)
1375 global_assign(&env);
1377 /* Insert spill/reload nodes into the graph and fix usages */
1378 be_insert_spills_reloads(env.senv);
1381 be_delete_spill_env(env.senv);
1383 obstack_free(&env.ob, NULL);
1386 void be_init_spillbelady2(void)
1388 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
1389 lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
1390 lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2");
1392 static be_spiller_t belady_spiller = {
1396 lc_opt_add_table(bel2_grp, options);
1397 be_register_spiller("belady2", &belady_spiller);
1398 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1401 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);