2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Sebastian Hack, Matthias Braun, Daniel Grund
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
41 #include "irnodeset.h"
43 #include "irprintf_t.h"
49 #include "iredges_t.h"
50 #include "irphase_t.h"
62 #include "bechordal_t.h"
64 #include "beloopana.h"
67 #include "bespillutil.h"
70 #include "lc_opts_enum.h"
79 #define DBG_WORKSET 128
80 #define DBG_GLOBAL 256
82 #define ALREADY_SPILLED_FACTOR 2
85 #define LIVE_END (DEAD-1)
86 #define REMAT_DIST (DEAD-2)
88 static int already_spilled_factor = 2;
89 static int remat_live_range_ext = 1;
90 static int global_pass_enabled = 1;
92 static const lc_opt_table_entry_t options[] = {
93 LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor),
94 LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
95 LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled),
99 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
102 * An association between a node and a point in time.
104 typedef struct _loc_t {
105 ir_node *irn; /**< A node. */
106 unsigned time; /**< A use time.
107 In the global pass this is used
108 as the version number and not as a time.
109 Only to save space...
113 typedef struct _workset_t {
114 int len; /**< current length */
115 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
118 typedef struct _belady_env_t {
122 const arch_env_t *arch;
123 const arch_register_class_t *cls;
127 ir_node **blocks; /**< Array of all blocks. */
128 int n_blocks; /**< Number of blocks in the graph. */
129 int n_regs; /**< number of regs in this reg-class */
130 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
131 ir_node *instr; /**< current instruction */
132 int instr_nr; /**< current instruction number (relative to block start) */
134 spill_env_t *senv; /**< see bespill.h */
135 bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */
136 ir_nodeset_t *extra_spilled; /** All nodes for which a special spill location has been computed. */
140 static int loc_compare(const void *a, const void *b)
144 return (p->time > q->time) - (p->time < q->time);
147 static inline void workset_print(const workset_t *w)
151 for(i = 0; i < w->len; ++i) {
152 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
157 * Alloc a new workset on obstack @p ob with maximum size @p max
159 static inline workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
160 return OALLOCFZ(ob, workset_t, vals, env->n_regs);
164 * Alloc a new instance on obstack and make it equal to @param ws
166 static inline workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
167 workset_t *res = OALLOCF(ob, workset_t, vals, env->n_regs);
168 memcpy(res, ws, sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]));
173 * Do NOT alloc anything. Make @param tgt equal to @param src.
174 * returns @param tgt for convenience
176 static inline workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
177 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
178 memcpy(tgt, src, size);
183 * Overwrites the current content array of @param ws with the
184 * @param count locations given at memory @param locs.
185 * Set the length of @param ws to count.
187 static inline void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
188 workset->len = count;
189 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
193 * Inserts the value @p val into the workset, iff it is not
194 * already contained. The workset must not be full.
196 static inline void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
198 /* check for current regclass */
199 if (!arch_irn_consider_in_reg_alloc(env->cls, val)) {
200 // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
204 /* check if val is already contained */
205 for(i=0; i<ws->len; ++i)
206 if (ws->vals[i].irn == val)
210 assert(ws->len < env->n_regs && "Workset already full!");
211 ws->vals[ws->len++].irn = val;
215 * Removes all entries from this workset
217 static inline void workset_clear(workset_t *ws) {
222 * Removes the value @p val from the workset if present.
224 static inline void workset_remove(workset_t *ws, ir_node *val) {
226 for(i=0; i<ws->len; ++i) {
227 if (ws->vals[i].irn == val) {
228 ws->vals[i] = ws->vals[--ws->len];
234 static inline int workset_get_index(const workset_t *ws, const ir_node *val) {
236 for(i=0; i<ws->len; ++i) {
237 if (ws->vals[i].irn == val)
245 * Iterates over all values in the working set.
246 * @p ws The workset to iterate
247 * @p v A variable to put the current value in
248 * @p i An integer for internal use
250 #define workset_foreach(ws, v, i) for(i=0; \
251 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
254 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
255 #define workset_get_time(ws, i) (ws)->vals[i].time
256 #define workset_set_length(ws, length) (ws)->len = length
257 #define workset_get_length(ws) ((ws)->len)
258 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
259 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
260 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
262 typedef struct _bring_in_t bring_in_t;
264 typedef struct _block_info_t {
269 workset_t *ws_end; /**< The end set after the local belady pass. */
270 double exec_freq; /**< The execution frequency of this block. */
272 double reload_cost; /**< Cost of a reload in this block. */
273 ir_node *first_non_in; /**< First node in block which is not a phi. */
274 ir_node *last_ins; /**< The instruction before which end of
275 block reloads will be inserted. */
277 int pressure; /**< The amount of registers which remain free
278 in this block. This capacity can be used to let
279 global variables, transported into other blocks,
280 live through this block. */
282 int front_pressure; /**< The pressure right before the first
283 real (non-phi) node. At the beginning
284 of the global pass, this is 0. */
285 struct list_head br_head; /**< List head for all bring_in variables. */
286 int free_at_jump; /**< registers free at jump. */
290 static inline void *new_block_info(belady_env_t *bel, int id)
292 ir_node *bl = bel->blocks[id];
293 block_info_t *res = OALLOCZ(&bel->ob, block_info_t);
294 res->first_non_in = NULL;
295 res->last_ins = NULL;
299 res->exec_freq = get_block_execfreq(bel->ef, bl);
300 res->reload_cost = bel->arch->reload_cost * res->exec_freq;
301 res->free_at_jump = bel->n_regs;
302 INIT_LIST_HEAD(&res->br_head);
303 set_irn_link(bl, res);
307 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
308 #define set_block_info(block, info) set_irn_link(block, info)
310 static inline ir_node *block_info_get_last_ins(block_info_t *bi)
313 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
318 typedef struct _next_use_t {
319 unsigned is_first_use : 1; /**< Indicate that this use is the first
320 in the block. Needed to identify
321 transport in values for the global
323 sched_timestep_t step; /**< The time step of the use. */
325 struct _next_use_t *next; /**< The next use int this block
329 static void *next_use_init(ir_phase *phase, const ir_node *irn, void *old)
337 static void build_next_uses(block_info_t *bi)
341 sched_renumber(bi->bl);
343 phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
344 sched_foreach_reverse(bi->bl, irn) {
350 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
351 ir_node *op = get_irn_n(irn, i);
352 next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
353 next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
355 use->is_first_use = 1;
356 use->step = sched_get_time_step(irn);
361 curr->is_first_use = 0;
362 assert(curr->step >= use->step);
365 phase_set_irn_data(&bi->next_uses, op, use);
370 #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn))
372 static inline void advance_current_use(block_info_t *bi, const ir_node *irn)
374 next_use_t *use = get_current_use(bi, irn);
377 phase_set_irn_data(&bi->next_uses, irn, use->next);
380 static __attribute__((unused)) int block_freq_gt(const void *a, const void *b)
382 const ir_node * const *p = a;
383 const ir_node * const *q = b;
384 block_info_t *pi = get_block_info(*p);
385 block_info_t *qi = get_block_info(*q);
386 double diff = qi->exec_freq - pi->exec_freq;
387 return (diff > 0) - (diff < 0);
390 static int block_freq_dfs_gt(const void *a, const void *b)
392 const ir_node * const *p = a;
393 const ir_node * const *q = b;
394 block_info_t *pi = get_block_info(*p);
395 block_info_t *qi = get_block_info(*q);
398 if ((pi->exec_freq > 1.0 && qi->exec_freq > 1.0)
399 || (pi->exec_freq <= 1.0 && qi->exec_freq <= 1.0)) {
401 const dfs_t *dfs = pi->bel->dfs;
402 int pp = dfs_get_post_num(dfs, pi->bl);
403 int pq = dfs_get_post_num(dfs, qi->bl);
407 diff = qi->exec_freq - pi->exec_freq;
408 return (diff > 0) - (diff < 0);
413 | __ ) _ __(_)_ __ __ _ |_ _|_ __
414 | _ \| '__| | '_ \ / _` | | || '_ \
415 | |_) | | | | | | | (_| | | || | | |
416 |____/|_| |_|_| |_|\__, | |___|_| |_|
419 Data structures to represent bring in variables.
423 ir_node *irn; /**< The node to bring in. */
424 block_info_t *bi; /**< The block to which bring in should happen. */
425 int pressure_so_far; /**< The maximal pressure till the first use of irn in bl. */
426 ir_node *first_use; /**< The first user of irn in bl. */
427 sched_timestep_t use_step; /**< Schedule step of the first use. */
429 int is_remat : 1; /**< Is rematerializable. */
430 int sect_pressure; /**< Offset to maximum pressure in block. */
431 struct list_head list;
432 struct list_head sect_list;
433 bring_in_t *sect_head;
436 static inline bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use)
438 bring_in_t *br = OALLOC(&bi->bel->ob, bring_in_t);
441 br->first_use = use->irn;
442 br->use_step = use->step;
443 br->is_remat = be_is_rematerializable(bi->bel->senv, irn, use->irn);
444 br->pressure_so_far = bi->pressure;
445 br->sect_pressure = bi->front_pressure;
448 INIT_LIST_HEAD(&br->list);
449 INIT_LIST_HEAD(&br->sect_list);
450 list_add_tail(&br->list, &bi->br_head);
454 static int bring_in_cmp(const void *a, const void *b)
456 const bring_in_t *p = *(const bring_in_t * const *) a;
457 const bring_in_t *q = *(const bring_in_t * const *) b;
460 /* if one of both is a remat node, it will be done after the other. */
461 if (p->is_remat != q->is_remat)
462 return p->is_remat - q->is_remat;
464 /* in the same block, the one further in the front has to be processed first!
465 * Otherwise the front_pressure 'trick' is not exact. */
467 return p->use_step - q->use_step;
469 fp = p->bi->exec_freq;
470 fq = q->bi->exec_freq;
472 /* if both have the same frequency, inspect the frequency of the definition */
474 double fdp = get_block_info(get_nodes_block(p->irn))->exec_freq;
475 double fdq = get_block_info(get_nodes_block(q->irn))->exec_freq;
477 /* if the defs of both have the same freq, we go for reverse dfs post order. */
479 const dfs_t *dfs = p->bi->bel->dfs;
480 int pp = dfs_get_post_num(dfs, p->bi->bl);
481 int pq = dfs_get_post_num(dfs, q->bi->bl);
485 return (fdq > fdp) - (fdq < fdp);
488 return (fq > fp) - (fq < fp);
491 static inline unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
493 belady_env_t *env = bi->bel;
494 sched_timestep_t curr_step = sched_get_time_step(env->instr);
495 next_use_t *use = get_current_use(bi, irn);
496 int flags = arch_irn_get_flags(irn);
498 assert(!arch_irn_is_ignore(irn));
500 /* We have to keep non-spillable nodes in the working set */
501 if(flags & arch_irn_flags_dont_spill)
504 if (!is_usage && use && use->step == curr_step)
508 unsigned res = use->step - curr_step;
510 assert(use->step >= curr_step);
513 if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
515 else if (bitset_contains_irn(env->spilled, irn))
516 res *= already_spilled_factor;
522 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
525 static inline int is_local_phi(const ir_node *bl, const ir_node *irn)
527 return is_Phi(irn) && get_nodes_block(irn) == bl;
531 * Check, if the value is something that is transported into a block.
532 * That is, the value is defined elsewhere or defined by a Phi in the block.
533 * @param env The belady environment.
534 * @param bl The block in question.
535 * @param irn The node in question.
536 * @return 1, if node is something transported into @p bl, 0 if not.
537 * @note The function will only give correct answers in the case
538 * where @p irn is unused in the block @p bl which is always
539 * the case in our usage scenario.
541 static inline int is_transport_in(const ir_node *bl, const ir_node *irn)
543 return get_nodes_block(irn) != bl || is_Phi(irn);
547 * Performs the actions necessary to grant the request that:
548 * - new_vals can be held in registers
549 * - as few as possible other values are disposed
550 * - the worst values get disposed
552 * @p is_usage indicates that the values in new_vals are used (not defined)
553 * In this case reloads must be performed
555 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
556 belady_env_t *env = bi->bel;
557 workset_t *ws = env->ws;
558 ir_node **to_insert = ALLOCAN(ir_node*, env->n_regs);
560 int i, len, max_allowed, demand, iter;
564 1. Identify the number of needed slots and the values to reload
567 workset_foreach(new_vals, val, iter) {
568 /* mark value as used */
570 if (! workset_contains(ws, val)) {
571 DBG((dbg, DBG_DECIDE, "\t\tinsert %+F\n", val));
572 to_insert[demand++] = val;
574 next_use_t *use = get_current_use(bi, val);
577 * if we use a value which is transported in this block, i.e. a
578 * phi defined here or a live in, for the first time, we check
579 * if there is room for that guy to survive from the block's
580 * entrance to here or not.
583 assert(sched_get_time_step(env->instr) == (int) use->step);
584 if (is_transport_in(bi->bl, val) && use->is_first_use) {
585 bring_in_t *bri = new_bring_in(bi, val, use);
586 bri->first_use = env->instr;
588 /* reset the section pressure, since a new section starts. */
589 bi->front_pressure = 0;
591 DBG((dbg, DBG_DECIDE, "\t\tbring in node %+F, pressure %d:\n", val, bi->pressure));
592 DBG((dbg, DBG_DECIDE, "\t\tno reload. must be considered at block start\n"));
596 bitset_add_irn(env->spilled, val);
597 DBG((dbg, DBG_SPILL, "\t\tReload %+F before %+F\n", val, env->instr));
598 be_add_reload(env->senv, val, env->instr, env->cls, 1);
602 assert(is_usage && "Defined value already in workset?!?");
603 DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val));
606 DBG((dbg, DBG_DECIDE, "\t\tdemand = %d\n", demand));
609 2. Make room for at least 'demand' slots
611 len = workset_get_length(ws);
612 max_allowed = env->n_regs - demand;
614 /* Only make more free room if we do not have enough */
615 if (len > max_allowed) {
616 DBG((dbg, DBG_DECIDE, "\t\tdisposing %d values\n", len - max_allowed));
618 /* get current next-use distance */
619 for (i = 0; i < ws->len; ++i) {
620 ir_node *val = workset_get_val(ws, i);
621 unsigned dist = get_curr_distance(bi, val, is_usage);
622 workset_set_time(ws, i, dist);
625 /* sort entries by increasing nextuse-distance*/
628 /* kill the last 'demand' entries in the array */
629 workset_set_length(ws, max_allowed);
633 3. Insert the new values into the workset
634 Also, we update the pressure in the block info.
635 That is important for the global pass to decide
636 how many values can live through the block.
638 for (i = 0; i < demand; ++i)
639 workset_insert(env, env->ws, to_insert[i]);
641 /* TODO: simplify this expression? */
642 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
643 bi->front_pressure = MAX(bi->front_pressure, workset_get_length(env->ws));
647 * For the given block @p block, decide for each values
648 * whether it is used from a register or is reloaded
651 static void belady(belady_env_t *env, int id) {
652 block_info_t *block_info = new_block_info(env, id);
653 const ir_node *block = block_info->bl;
659 DBG((dbg, DBG_WSETS, "Belady on %+F\n", block_info->bl));
660 new_vals = new_workset(env, &env->ob);
661 workset_clear(env->ws);
663 /* build the next use information for this block. */
664 build_next_uses(block_info);
667 block_info->first_non_in = NULL;
669 /* process the block from start to end */
670 sched_foreach(block, irn) {
672 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
674 /* Projs are handled with the tuple value.
675 * Phis are no real instr (see insert_starters())
676 * instr_nr does not increase */
677 if (is_Proj(irn) || is_Phi(irn))
679 DBG((dbg, DBG_DECIDE, "\t%+F\n", irn));
681 if (!block_info->first_non_in)
682 block_info->first_non_in = irn;
684 /* set instruction in the workset */
687 /* allocate all values _used_ by this instruction */
688 workset_clear(new_vals);
689 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
690 workset_insert(env, new_vals, get_irn_n(irn, i));
692 DBG((dbg, DBG_DECIDE, "\t* uses\n"));
693 displace(block_info, new_vals, 1);
696 * set all used variables to the next use in their next_use_t list
697 * Also, kill all dead variables from the workset. They are only
698 * augmenting the pressure. Note, that a variable is dead
699 * if it has no further use in this block and is *not* live end
701 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
702 ir_node *op = get_irn_n(irn, i);
703 next_use_t *use = get_current_use(block_info, op);
706 if (!use->next && !be_is_live_end(env->lv, block, op))
707 workset_remove(env->ws, op);
709 advance_current_use(block_info, op);
712 /* allocate all values _defined_ by this instruction */
713 workset_clear(new_vals);
714 if (get_irn_mode(irn) == mode_T) { /* special handling for Tuples and Projs */
715 const ir_edge_t *edge;
717 foreach_out_edge(irn, edge) {
718 ir_node *proj = get_edge_src_irn(edge);
719 workset_insert(env, new_vals, proj);
722 workset_insert(env, new_vals, irn);
724 DBG((dbg, DBG_DECIDE, "\t* defs\n"));
725 displace(block_info, new_vals, 0);
727 if (is_op_forking(get_irn_op(env->instr))) {
728 for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) {
729 ir_node *op = get_irn_n(env->instr, i);
730 block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->cls, op);
737 phase_free(&block_info->next_uses);
739 /* Remember end-workset for this block */
740 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
741 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
742 workset_foreach(block_info->ws_end, irn, iter)
743 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
744 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
746 /* now, initialize the front pressure to 0. */
747 block_info->front_pressure = 0;
752 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
753 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
754 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
755 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
760 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
761 #define workset_get_version(ws, i) ((ws)->vals[(i)].time)
763 #define ver_oldest (0)
764 #define ver_youngest ((unsigned) -1)
765 #define ver_make_newer(v) ((v) + 1)
766 #define ver_is_older(v, w) ((v) < (w))
767 #define ver_is_younger(v, w) ((v) > (w))
775 typedef struct _block_state_t {
776 struct _block_state_t *next;
777 struct _block_state_t *next_intern;
780 workset_t *end_state;
783 typedef struct _irn_action_t {
784 struct _irn_action_t *next;
790 typedef struct _global_end_state_t {
798 unsigned *bs_tops_vers;
799 block_state_t **bs_tops;
800 block_state_t *bs_top;
801 irn_action_t *ia_top;
802 } global_end_state_t;
806 block_state_t *bs_top;
807 irn_action_t *ia_top;
810 static inline block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi)
813 assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
814 return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
817 static inline const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
819 block_state_t *bs = get_block_state(ges, bi);
820 return bs ? bs->end_state : bi->ws_end;
823 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
825 block_state_t *bs = get_block_state(ges, bi);
826 block_state_t *nw = OALLOC(&ges->obst, block_state_t);
828 nw->next_intern = bs;
829 nw->next = ges->bs_top;
833 nw->pressure = bs->pressure;
834 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
837 nw->pressure = bi->pressure;
838 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
842 ges->bs_tops[bi->id] = nw;
843 ges->bs_tops_vers[bi->id] = ges->version;
847 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
849 irn_action_t *ia = OALLOC(&ges->obst, irn_action_t);
853 ia->act = irn_act_none;
854 ia->next = ges->ia_top;
859 static inline rollback_info_t trans_begin(global_end_state_t *ges)
862 rb.obst_level = obstack_base(&ges->obst);
863 rb.bs_top = ges->bs_top;
864 rb.ia_top = ges->ia_top;
868 static inline void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
872 /* unwind all the stacks indiced with the block number */
873 for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
874 unsigned id = bs->bi->id;
875 ges->bs_tops[id] = bs->next_intern;
878 ges->ia_top = rb->ia_top;
879 ges->bs_top = rb->bs_top;
880 obstack_free(&ges->obst, rb->obst_level);
884 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
886 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
888 block_info_t *bi = get_block_info(bl);
889 const workset_t *end = get_end_state(ges, bi);
893 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
896 * to make the value available at end,
897 * we have several cases here.
899 * - we already visited that block.
900 * - If the value is in the final end set, return 0.
901 * somebody else already allocated it there.
902 * - If not and the final end set is already full,
903 * we cannot make the value available at the end
904 * of this block. return INFINITY.
905 * - Else (value not in final end set and there is room):
906 * 1) The value is in a register at the end of the local Belady pass.
907 * Allocate a slot in the final end set and return 0.
908 * 2) The value is not in the Belady end set:
909 * If the block's capacity is < k then check what it costs
910 * to transport the value from upper blocks to this block.
911 * Compare that against the reload cost in this block. If
912 * cheaper, do the other thing. If not, reload it here.
915 /* if the end set contains it already, it is in a reg and it costs nothing
916 * to load it to one. */
917 index = workset_get_index(end, irn);
919 unsigned ver = workset_get_version(end, index);
920 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
921 level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
924 * if the version is older, the value is already fixed
925 * and cannot be removed from the end set.
927 * If not, we will create a new block state for that block since
928 * we modify it by giving the end state a new version.
930 if (ver_is_younger(ver, ges->version)) {
931 block_state_t *bs = new_block_state(ges, bi);
932 workset_set_version(bs->end_state, index, ges->version);
940 * Now we have two options:
941 * 1) Reload the value at the end of the block.
942 * Therefore, perhaps, we have to erase another one from the workset.
943 * This may only be done if it has not been fixed.
944 * Since fixed means that a previous pass has decided that that value
945 * *has* to stay in the end set.
946 * 2) we can try, if the capacity of the block allows it, to let
947 * the value live through the block and make it available at
950 * First, we test the local (reload in this block) alternative
951 * and compare against the other alternative.
952 * Of course, we chose the cheaper one.
956 int n_regs = bi->free_at_jump;
957 int len = workset_get_length(end);
964 * look if there is room in the end array
965 * for the variable. Note that this does not
966 * mean that the var can live through the block.
967 * There is just room at the *end*
970 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
973 for (i = 0; i < len; ++i) {
974 unsigned ver = workset_get_version(end, i);
975 if (ver_is_younger(ver, ges->version))
980 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
981 level, end->vals[i].irn, i));
987 * finally there is some room. we can at least reload the value.
988 * but we will try to let or live through anyhow.
991 irn_action_t *vs = new_irn_action(ges, irn, bi->bl);
992 block_state_t *bs = new_block_state(ges, bi);
993 workset_t *end = bs->end_state;
994 ir_node *ins_before = block_info_get_last_ins(bi);
995 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
996 int pressure_ok = bs->pressure < n_regs;
998 if (reload_here < bi->reload_cost)
1002 * No matter what we do, the value will be in the end set
1003 * if the block from now on (of course only regarding the
1004 * current state). Enter it and set the new length
1007 end->vals[slot].irn = irn;
1008 workset_set_version(end, slot, ges->version);
1009 workset_set_length(end, MAX(workset_get_length(end), slot + 1));
1011 vs->act = irn_act_reload;
1014 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
1015 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
1018 /* look if we can bring the value in. */
1019 if (pressure_ok && reload_here > 0.0) {
1020 rollback_info_t rb = trans_begin(ges);
1021 double new_limit = MIN(reload_here, limit);
1023 vs->act = irn_act_live_through;
1025 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
1028 * if bring in is too expensive re-adjust the pressure
1029 * and roll back the state
1031 if (res >= reload_here) {
1033 vs->act = irn_act_reload;
1034 trans_rollback(ges, &rb);
1040 DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
1041 vs->act == irn_act_reload ? "reloading" : "bringing in"));
1046 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
1050 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
1052 belady_env_t *env = ges->env;
1053 double glob_costs = HUGE_VAL;
1055 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
1057 if (is_transport_in(bl, irn)) {
1058 int i, n = get_irn_arity(bl);
1059 rollback_info_t rb = trans_begin(ges);
1062 for (i = 0; i < n; ++i) {
1063 ir_node *pr = get_Block_cfgpred_block(bl, i);
1064 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
1068 * there might by Unknowns as operands of Phis in that case
1069 * we set the costs to zero, since they won't get spilled.
1071 if (arch_irn_consider_in_reg_alloc(env->cls, op))
1072 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
1078 if (glob_costs >= limit) {
1079 glob_costs = HUGE_VAL;
1080 trans_rollback(ges, &rb);
1087 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
1091 static void materialize_and_commit_end_state(global_end_state_t *ges)
1093 belady_env_t *env = ges->env;
1097 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
1100 * Perform all the variable actions.
1102 for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
1104 case irn_act_live_through:
1106 block_info_t *bi = get_block_info(ia->bl);
1109 if (is_local_phi(ia->bl, ia->irn)) {
1110 bitset_add_irn(ges->succ_phis, ia->irn);
1111 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
1114 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list)
1115 ++iter->sect_pressure;
1116 ++bi->front_pressure;
1119 case irn_act_reload:
1120 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
1121 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
1124 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
1129 * Commit the block end states
1131 for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
1132 block_info_t *bi = bs->bi;
1134 if (!bitset_is_set(ges->committed, bi->id)) {
1135 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
1136 // bes->bs->end_state->vals[idx].version = ges->version;
1137 workset_copy(env, bi->ws_end, bs->end_state);
1138 DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
1139 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
1140 bi->pressure = bs->pressure;
1141 bitset_set(ges->committed, bi->id);
1145 /* clear the committed bitset. the next call is expecting it. */
1146 bitset_clear_all(ges->committed);
1149 static ir_node *better_spilled_here(const bring_in_t *br)
1151 const block_info_t *bi = br->bi;
1152 double spill_ef = get_block_info(get_nodes_block(br->irn))->exec_freq;
1155 * If the bring in node is a phi in the bring in block,
1156 * we look at all definitions and sum up their execution frequencies,
1157 * since spills will be placed there.
1158 * (except for the case where an operand is also a phi which is spilled :-( )
1159 * If that cost is higher than spilling the phi in that block, we opt for
1160 * bringing the phi into the block and spill it there.
1162 if (is_local_phi(bi->bl, br->irn)) {
1163 ir_node *bl = bi->bl;
1167 for (i = get_Block_n_cfgpreds(bl) - 1; i >= 0; --i)
1168 spill_ef += get_block_info(get_Block_cfgpred_block(bl, i))->exec_freq;
1171 return bi->exec_freq < spill_ef ? sched_prev(bi->first_non_in) : NULL;
1174 static int get_max_pressure_so_far(const block_info_t *bi, const bring_in_t *br)
1176 const struct list_head *l;
1179 assert(br->bi == bi);
1180 for (l = &br->list; l != &bi->br_head; l = l->prev) {
1181 br = list_entry(l, bring_in_t, list);
1182 res = MAX(res, br->sect_pressure);
1185 /* finally consider the front pressure distance and add the reference line (the global block pressure) */
1186 return MAX(res, bi->front_pressure);
1189 #define block_last_bring_in(bi) list_entry((bi)->br_head.prev, bring_in_t, list)
1192 static int get_block_max_pressure(const block_info_t *bi)
1194 int max = get_max_pressure_so_far(bi, block_last_bring_in(bi));
1195 return MAX(bi->pressure, max);
1200 * Try to bring a variable into a block.
1201 * @param ges The state of all end sets.
1202 * @param block The block.
1203 * @param irn The variable.
1205 static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
1207 block_info_t *bi = br->bi;
1208 ir_node *irn = br->irn;
1209 ir_node *bl = bi->bl;
1210 belady_env_t *env = ges->env;
1211 void *reset_level = obstack_base(&ges->obst);
1212 int k = env->n_regs;
1213 int pressure_upto_use = get_max_pressure_so_far(bi, br);
1214 int front_pressure = bi->front_pressure;
1215 ir_node *better_spill_loc = NULL;
1217 assert(front_pressure <= k);
1218 assert(pressure_upto_use <= k);
1220 DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n",
1221 irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use));
1223 // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn));
1226 * if we cannot bring the value to the use, let's see if it would be worthwhile
1227 * to bring the value to the beginning of the block to have a better spill
1230 * better _spilled_here will return a node where the value can be spilled after
1231 * or NULL if this block does not provide a better spill location.
1234 if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn))
1235 better_spill_loc = better_spilled_here(br);
1239 * If either we can bring the value to the use or we should try
1240 * to bring it here to do the spill here, let's try to bring it in.
1242 if (better_spill_loc || pressure_upto_use < k) {
1244 double bring_in_costs, local_costs;
1245 rollback_info_t trans;
1248 /* process all variables which shall be in a reg at
1249 * the beginning of the block in the order of the next use. */
1250 local_costs = be_get_reload_costs(env->senv, irn, br->first_use);
1252 /* reset the lists */
1256 /* if the variable will live into this block, we must adapt the pressure.
1257 * The new pressure is the MAX of:
1258 * 1) the total block pressure
1259 * 2) the pressure so far + the front pressure increase + 1
1261 * If the second is larger than the first,
1262 * we have to increment the total block pressure and hence
1263 * save the old pressure to restore it in case of failing to
1264 * bring the variable into the block in a register.
1266 trans = trans_begin(ges);
1267 bs = new_block_state(ges, bi);
1268 pressure_inc = MAX(bs->pressure, better_spill_loc ? front_pressure : pressure_upto_use + 1);
1269 bs->pressure = pressure_inc;
1272 assert(bi->pressure <= k);
1273 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1274 bring_in_costs = can_bring_in(ges, bl, irn, local_costs, 1);
1275 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f\n", bring_in_costs, local_costs));
1278 * Following cases can now occur:
1279 * 1) There is room and costs ok
1280 * 2) Cannot bring to use but can spill at begin and costs are ok
1281 * 3) neither of both worked.
1283 * following actions can be taken:
1285 * b) mark phi as succeeded if node was phi
1286 * c) insert reload at use location
1287 * d) give a spill location hint
1289 * this is the case/action matrix
1297 /* the costs were acceptable... */
1298 if (bring_in_costs < local_costs) {
1303 * case 1 and first part of case 2:
1304 * commit all the changes done. this manifests the bring-in action.
1305 * if the transport-in was a phi (that is actually used in block)
1306 * mark it in the succ_phis set to *not* phi spill it.
1308 materialize_and_commit_end_state(ges);
1309 if (is_local_phi(bl, irn))
1310 bitset_add_irn(ges->succ_phis, irn);
1312 DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc));
1314 /* second half of case 2 */
1315 if (pressure_upto_use >= k) {
1316 DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n",
1317 br->first_use, better_spill_loc));
1318 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1319 be_add_spill(env->senv, irn, better_spill_loc);
1320 ir_nodeset_insert(env->extra_spilled, irn);
1324 * go from the last bring in use to the first and add all the variables
1325 * which additionally live through the block to their pressure.
1326 * at the point were the actually treated use is, we have to increase
1327 * the pressure by one more as the brought in value starts to count.
1328 * Finally, adjust the front pressure as well.
1331 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) {
1333 pressure_inc += pressure_upto_use < k;
1334 iter->sect_pressure += pressure_inc;
1335 check = MAX(check, iter->sect_pressure);
1336 DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure));
1338 bi->front_pressure += pressure_inc;
1339 assert(MAX(check, bi->front_pressure) <= bi->pressure);
1340 DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure));
1343 /* case 3: nothing worked. insert normal reload and rollback. */
1345 DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use));
1346 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1347 bitset_add_irn(env->spilled, irn);
1348 trans_rollback(ges, &trans);
1352 /* there was no opportunity for optimization at all. reload and be sad ... */
1354 DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use));
1355 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1356 bitset_add_irn(env->spilled, irn);
1359 DBG((dbg, DBG_GLOBAL, "\n"));
1361 /* reset the obstack and create a new version. */
1362 obstack_free(&ges->obst, reset_level);
1363 ges->version = ver_make_newer(ges->version);
1366 static bring_in_t **determine_global_order(belady_env_t *env)
1372 for (i = env->n_blocks - 1; i >= 0; --i) {
1373 block_info_t *bi = get_block_info(env->blocks[i]);
1374 list_for_each_entry(bring_in_t, elm, &bi->br_head, list) {
1375 obstack_ptr_grow(&env->ob, elm);
1380 obstack_ptr_grow(&env->ob, NULL);
1381 res = obstack_finish(&env->ob);
1382 qsort(res, n, sizeof(res[0]), bring_in_cmp);
1389 static void global_assign(belady_env_t *env)
1391 ir_nodeset_iterator_t iter;
1392 global_end_state_t ges;
1398 * sort the blocks according to execution frequency.
1399 * That's not necessary for belady() but for the global pass later on.
1401 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_dfs_gt);
1403 memset(&ges, 0, sizeof(ges));
1404 obstack_init(&ges.obst);
1406 ges.version = ver_make_newer(ver_oldest);
1407 ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1408 ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1409 ges.bs_tops = OALLOCN(&ges.obst, block_state_t*, env->n_blocks);
1410 ges.bs_tops_vers = OALLOCN(&ges.obst, unsigned, env->n_blocks);
1412 /* invalidate all state stack pointer versions */
1413 for (i = 0; i < env->n_blocks; ++i) {
1414 block_info_t *bi = get_block_info(env->blocks[i]);
1415 ges.bs_tops_vers[i] = ver_oldest;
1417 /* Set all block end sets entries to the youngest version */
1418 for (j = workset_get_length(bi->ws_end) - 1; j >= 0; --j)
1419 workset_set_version(bi->ws_end, j, ver_youngest);
1422 /* determine order and optimize them */
1423 for (br = determine_global_order(env); *br; ++br)
1424 optimize_variable(&ges, *br);
1427 * Now we spill phis which cannot be kept since they were replaced
1428 * by reloads at the block entrances.
1430 for (i = 0; i < env->n_blocks; ++i) {
1431 ir_node *bl = env->blocks[i];
1434 sched_foreach(bl, irn) {
1438 if (arch_irn_consider_in_reg_alloc(env->cls, irn)
1439 && !bitset_contains_irn(ges.succ_phis, irn))
1440 be_spill_phi(env->senv, irn);
1444 /* check dominance for specially spilled nodes. */
1445 foreach_ir_nodeset (env->extra_spilled, irn, iter)
1446 make_spill_locations_dominate_irn(env->senv, irn);
1449 static void collect_blocks(ir_node *bl, void *data)
1451 belady_env_t *env = data;
1453 obstack_ptr_grow(&env->ob, bl);
1457 * Do spilling for a register class on a graph using the belady heuristic.
1458 * In the transformed graph, the register pressure never exceeds the number
1459 * of available registers.
1461 * @param birg The backend graph
1462 * @param cls The register class to spill
1464 void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
1466 ir_graph *irg = be_get_birg_irg(birg);
1470 /* some special classes contain only ignore regs, nothing to do then */
1471 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1475 be_clear_links(irg);
1477 /* init belady env */
1478 obstack_init(&env.ob);
1480 env.arch = be_get_birg_arch_env(birg);
1482 env.lv = be_get_birg_liveness(birg);
1483 env.dfs = env.lv->dfs;
1484 env.n_regs = n_regs;
1485 env.ws = new_workset(&env, &env.ob);
1486 env.senv = be_new_spill_env(birg);
1487 env.ef = be_get_birg_exec_freq(birg);
1488 env.spilled = bitset_irg_obstack_alloc(&env.ob, irg);
1489 env.extra_spilled = ir_nodeset_new(64);
1492 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1493 obstack_ptr_grow(&env.ob, NULL);
1494 env.blocks = obstack_finish(&env.ob);
1496 /* renumbering in the blocks gives nicer debug output as number are smaller. */
1497 #ifdef DEBUG_libfirm
1498 for (i = 0; i < env.n_blocks; ++i)
1499 sched_renumber(env.blocks[i]);
1502 /* Fix high register pressure in blocks with belady algorithm */
1503 for (i = 0; i < env.n_blocks; ++i)
1506 global_assign(&env);
1508 /* check dominance for specially spilled nodes. */
1510 ir_nodeset_iterator_t iter;
1513 foreach_ir_nodeset (env.extra_spilled, irn, iter)
1514 make_spill_locations_dominate_irn(env.senv, irn);
1517 /* Insert spill/reload nodes into the graph and fix usages */
1518 be_insert_spills_reloads(env.senv);
1521 be_delete_spill_env(env.senv);
1522 ir_nodeset_del(env.extra_spilled);
1524 obstack_free(&env.ob, NULL);
1527 void be_init_spillbelady2(void)
1529 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
1530 lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
1531 lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2");
1533 static be_spiller_t belady_spiller = {
1537 lc_opt_add_table(bel2_grp, options);
1538 be_register_spiller("belady2", &belady_spiller);
1539 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1542 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);