2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Sebastian Hack, Matthias Braun, Daniel Grund
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
43 #include "irnodeset.h"
45 #include "irprintf_t.h"
51 #include "iredges_t.h"
52 #include "irphase_t.h"
60 #include "besched_t.h"
64 #include "bechordal_t.h"
65 #include "bespilloptions.h"
66 #include "beloopana.h"
72 #include "lc_opts_enum.h"
81 #define DBG_WORKSET 128
82 #define DBG_GLOBAL 256
84 #define ALREADY_SPILLED_FACTOR 2
87 #define LIVE_END (DEAD-1)
88 #define REMAT_DIST (DEAD-2)
90 static int already_spilled_factor = 2;
91 static int remat_live_range_ext = 1;
92 static int global_pass_enabled = 1;
94 static const lc_opt_table_entry_t options[] = {
95 LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor),
96 LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
97 LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled),
101 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
104 * An association between a node and a point in time.
106 typedef struct _loc_t {
107 ir_node *irn; /**< A node. */
108 unsigned time; /**< A use time.
109 In the global pass this is used
110 as the version number and not as a time.
111 Only to save space...
115 typedef struct _workset_t {
116 int len; /**< current length */
117 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
120 typedef struct _belady_env_t {
124 const arch_env_t *arch;
125 const arch_register_class_t *cls;
129 ir_node **blocks; /**< Array of all blocks. */
130 int n_blocks; /**< Number of blocks in the graph. */
131 int n_regs; /**< number of regs in this reg-class */
132 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
133 ir_node *instr; /**< current instruction */
134 int instr_nr; /**< current instruction number (relative to block start) */
136 spill_env_t *senv; /**< see bespill.h */
137 bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */
138 ir_nodeset_t *extra_spilled; /** All nodes for which a special spill location has been computed. */
142 static int loc_compare(const void *a, const void *b)
146 return (p->time > q->time) - (p->time < q->time);
149 static INLINE void workset_print(const workset_t *w)
153 for(i = 0; i < w->len; ++i) {
154 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
159 * Alloc a new workset on obstack @p ob with maximum size @p max
161 static INLINE workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
163 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
164 res = obstack_alloc(ob, size);
165 memset(res, 0, size);
170 * Alloc a new instance on obstack and make it equal to @param ws
172 static INLINE workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
174 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
175 res = obstack_alloc(ob, size);
176 memcpy(res, ws, size);
181 * Do NOT alloc anything. Make @param tgt equal to @param src.
182 * returns @param tgt for convenience
184 static INLINE workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
185 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
186 memcpy(tgt, src, size);
191 * Overwrites the current content array of @param ws with the
192 * @param count locations given at memory @param locs.
193 * Set the length of @param ws to count.
195 static INLINE void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
196 workset->len = count;
197 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
201 * Inserts the value @p val into the workset, iff it is not
202 * already contained. The workset must not be full.
204 static INLINE void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
206 /* check for current regclass */
207 if (!arch_irn_consider_in_reg_alloc(env->cls, val)) {
208 // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
212 /* check if val is already contained */
213 for(i=0; i<ws->len; ++i)
214 if (ws->vals[i].irn == val)
218 assert(ws->len < env->n_regs && "Workset already full!");
219 ws->vals[ws->len++].irn = val;
223 * Removes all entries from this workset
225 static INLINE void workset_clear(workset_t *ws) {
230 * Removes the value @p val from the workset if present.
232 static INLINE void workset_remove(workset_t *ws, ir_node *val) {
234 for(i=0; i<ws->len; ++i) {
235 if (ws->vals[i].irn == val) {
236 ws->vals[i] = ws->vals[--ws->len];
242 static INLINE int workset_get_index(const workset_t *ws, const ir_node *val) {
244 for(i=0; i<ws->len; ++i) {
245 if (ws->vals[i].irn == val)
253 * Iterates over all values in the working set.
254 * @p ws The workset to iterate
255 * @p v A variable to put the current value in
256 * @p i An integer for internal use
258 #define workset_foreach(ws, v, i) for(i=0; \
259 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
262 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
263 #define workset_get_time(ws, i) (ws)->vals[i].time
264 #define workset_set_length(ws, length) (ws)->len = length
265 #define workset_get_length(ws) ((ws)->len)
266 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
267 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
268 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
270 typedef struct _bring_in_t bring_in_t;
272 typedef struct _block_info_t {
277 workset_t *ws_end; /**< The end set after the local belady pass. */
278 double exec_freq; /**< The execution frequency of this block. */
280 double reload_cost; /**< Cost of a reload in this block. */
281 ir_node *first_non_in; /**< First node in block which is not a phi. */
282 ir_node *last_ins; /**< The instruction before which end of
283 block reloads will be inserted. */
285 int pressure; /**< The amount of registers which remain free
286 in this block. This capacity can be used to let
287 global variables, transported into other blocks,
288 live through this block. */
290 int front_pressure; /**< The pressure right before the first
291 real (non-phi) node. At the beginning
292 of the global pass, this is 0. */
293 struct list_head br_head; /**< List head for all bring_in variables. */
294 int free_at_jump; /**< registers free at jump. */
298 static INLINE void *new_block_info(belady_env_t *bel, int id)
300 ir_node *bl = bel->blocks[id];
301 block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
302 memset(res, 0, sizeof(res[0]));
303 res->first_non_in = NULL;
304 res->last_ins = NULL;
308 res->exec_freq = get_block_execfreq(bel->ef, bl);
309 res->reload_cost = bel->arch->reload_cost * res->exec_freq;
310 res->free_at_jump = bel->n_regs;
311 INIT_LIST_HEAD(&res->br_head);
312 set_irn_link(bl, res);
316 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
317 #define set_block_info(block, info) set_irn_link(block, info)
319 static INLINE ir_node *block_info_get_last_ins(block_info_t *bi)
322 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
327 typedef struct _next_use_t {
328 unsigned is_first_use : 1; /**< Indicate that this use is the first
329 in the block. Needed to identify
330 transport in values for the global
332 sched_timestep_t step; /**< The time step of the use. */
334 struct _next_use_t *next; /**< The next use int this block
338 static void *next_use_init(ir_phase *phase, const ir_node *irn, void *old)
346 static void build_next_uses(block_info_t *bi)
350 sched_renumber(bi->bl);
352 phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
353 sched_foreach_reverse(bi->bl, irn) {
359 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
360 ir_node *op = get_irn_n(irn, i);
361 next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
362 next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
364 use->is_first_use = 1;
365 use->step = sched_get_time_step(irn);
370 curr->is_first_use = 0;
371 assert(curr->step >= use->step);
374 phase_set_irn_data(&bi->next_uses, op, use);
379 #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn))
381 static INLINE void advance_current_use(block_info_t *bi, const ir_node *irn)
383 next_use_t *use = get_current_use(bi, irn);
386 phase_set_irn_data(&bi->next_uses, irn, use->next);
389 static __attribute__((unused)) int block_freq_gt(const void *a, const void *b)
391 const ir_node * const *p = a;
392 const ir_node * const *q = b;
393 block_info_t *pi = get_block_info(*p);
394 block_info_t *qi = get_block_info(*q);
395 double diff = qi->exec_freq - pi->exec_freq;
396 return (diff > 0) - (diff < 0);
399 static int block_freq_dfs_gt(const void *a, const void *b)
401 const ir_node * const *p = a;
402 const ir_node * const *q = b;
403 block_info_t *pi = get_block_info(*p);
404 block_info_t *qi = get_block_info(*q);
407 if ((pi->exec_freq > 1.0 && qi->exec_freq > 1.0)
408 || (pi->exec_freq <= 1.0 && qi->exec_freq <= 1.0)) {
410 const dfs_t *dfs = pi->bel->dfs;
411 int pp = dfs_get_post_num(dfs, pi->bl);
412 int pq = dfs_get_post_num(dfs, qi->bl);
416 diff = qi->exec_freq - pi->exec_freq;
417 return (diff > 0) - (diff < 0);
422 | __ ) _ __(_)_ __ __ _ |_ _|_ __
423 | _ \| '__| | '_ \ / _` | | || '_ \
424 | |_) | | | | | | | (_| | | || | | |
425 |____/|_| |_|_| |_|\__, | |___|_| |_|
428 Data structures to represent bring in variables.
432 ir_node *irn; /**< The node to bring in. */
433 block_info_t *bi; /**< The block to which bring in should happen. */
434 int pressure_so_far; /**< The maximal pressure till the first use of irn in bl. */
435 ir_node *first_use; /**< The first user of irn in bl. */
436 sched_timestep_t use_step; /**< Schedule sttep of the first use. */
438 int is_remat : 1; /**< Is rematerializable. */
439 int sect_pressure; /**< Offset to maximum pressure in block. */
440 struct list_head list;
441 struct list_head sect_list;
442 bring_in_t *sect_head;
445 static INLINE bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use)
447 bring_in_t *br = obstack_alloc(&bi->bel->ob, sizeof(br[0]));
451 br->first_use = use->irn;
452 br->use_step = use->step;
453 br->is_remat = be_is_rematerializable(bi->bel->senv, irn, use->irn);
454 br->pressure_so_far = bi->pressure;
455 br->sect_pressure = bi->front_pressure;
458 INIT_LIST_HEAD(&br->list);
459 INIT_LIST_HEAD(&br->sect_list);
460 list_add_tail(&br->list, &bi->br_head);
464 static int bring_in_cmp(const void *a, const void *b)
466 const bring_in_t *p = *(const bring_in_t * const *) a;
467 const bring_in_t *q = *(const bring_in_t * const *) b;
470 /* if one of both is a remat node, it will be done after the other. */
471 if (p->is_remat != q->is_remat)
472 return p->is_remat - q->is_remat;
474 /* in the same block, the one further in the front has to be processed first!
475 * Otherwise the front_pressure 'trick' is not exact. */
477 return p->use_step - q->use_step;
479 fp = p->bi->exec_freq;
480 fq = q->bi->exec_freq;
482 /* if both have the same frequency, inspect the frequency of the definition */
484 double fdp = get_block_info(get_nodes_block(p->irn))->exec_freq;
485 double fdq = get_block_info(get_nodes_block(q->irn))->exec_freq;
487 /* if the defs of both have the same freq, we go for reverse dfs post order. */
489 const dfs_t *dfs = p->bi->bel->dfs;
490 int pp = dfs_get_post_num(dfs, p->bi->bl);
491 int pq = dfs_get_post_num(dfs, q->bi->bl);
495 return (fdq > fdp) - (fdq < fdp);
498 return (fq > fp) - (fq < fp);
501 static INLINE unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
503 belady_env_t *env = bi->bel;
504 sched_timestep_t curr_step = sched_get_time_step(env->instr);
505 next_use_t *use = get_current_use(bi, irn);
506 int flags = arch_irn_get_flags(irn);
508 assert(!(flags & arch_irn_flags_ignore));
510 /* We have to keep nonspillable nodes in the workingset */
511 if(flags & arch_irn_flags_dont_spill)
514 if (!is_usage && use && use->step == curr_step)
518 unsigned res = use->step - curr_step;
520 assert(use->step >= curr_step);
523 if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
525 else if (bitset_contains_irn(env->spilled, irn))
526 res *= already_spilled_factor;
532 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
535 static INLINE int is_local_phi(const ir_node *bl, const ir_node *irn)
537 return is_Phi(irn) && get_nodes_block(irn) == bl;
541 * Check, if the value is something that is transported into a block.
542 * That is, the value is defined elsewhere or defined by a Phi in the block.
543 * @param env The belady environment.
544 * @param bl The block in question.
545 * @param irn The node in question.
546 * @return 1, if node is something transported into @p bl, 0 if not.
547 * @note The function will only give correct answers in the case
548 * where @p irn is unsed in the block @p bl which is always
549 * the case in our usage scenario.
551 static INLINE int is_transport_in(const ir_node *bl, const ir_node *irn)
553 return get_nodes_block(irn) != bl || is_Phi(irn);
557 * Performs the actions necessary to grant the request that:
558 * - new_vals can be held in registers
559 * - as few as possible other values are disposed
560 * - the worst values get disposed
562 * @p is_usage indicates that the values in new_vals are used (not defined)
563 * In this case reloads must be performed
565 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
566 belady_env_t *env = bi->bel;
567 workset_t *ws = env->ws;
568 ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0]));
570 int i, len, max_allowed, demand, iter;
574 1. Identify the number of needed slots and the values to reload
577 workset_foreach(new_vals, val, iter) {
578 /* mark value as used */
580 if (! workset_contains(ws, val)) {
581 DBG((dbg, DBG_DECIDE, "\t\tinsert %+F\n", val));
582 to_insert[demand++] = val;
584 next_use_t *use = get_current_use(bi, val);
587 * if we use a value which is transported in this block, i.e. a
588 * phi defined here or a live in, for the first time, we check
589 * if there is room for that guy to survive from the block's
590 * entrance to here or not.
593 assert(sched_get_time_step(env->instr) == (int) use->step);
594 if (is_transport_in(bi->bl, val) && use->is_first_use) {
595 bring_in_t *bri = new_bring_in(bi, val, use);
596 bri->first_use = env->instr;
598 /* reset the section pressure, since a new section starts. */
599 bi->front_pressure = 0;
601 DBG((dbg, DBG_DECIDE, "\t\tbring in node %+F, pressure %d:\n", val, bi->pressure));
602 DBG((dbg, DBG_DECIDE, "\t\tno reload. must be considered at block start\n"));
606 bitset_add_irn(env->spilled, val);
607 DBG((dbg, DBG_SPILL, "\t\tReload %+F before %+F\n", val, env->instr));
608 be_add_reload(env->senv, val, env->instr, env->cls, 1);
612 assert(is_usage && "Defined value already in workset?!?");
613 DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val));
616 DBG((dbg, DBG_DECIDE, "\t\tdemand = %d\n", demand));
619 2. Make room for at least 'demand' slots
621 len = workset_get_length(ws);
622 max_allowed = env->n_regs - demand;
624 /* Only make more free room if we do not have enough */
625 if (len > max_allowed) {
626 DBG((dbg, DBG_DECIDE, "\t\tdisposing %d values\n", len - max_allowed));
628 /* get current next-use distance */
629 for (i = 0; i < ws->len; ++i) {
630 ir_node *val = workset_get_val(ws, i);
631 unsigned dist = get_curr_distance(bi, val, is_usage);
632 workset_set_time(ws, i, dist);
635 /* sort entries by increasing nextuse-distance*/
638 /* kill the last 'demand' entries in the array */
639 workset_set_length(ws, max_allowed);
643 3. Insert the new values into the workset
644 Also, we update the pressure in the block info.
645 That is important for the global pass to decide
646 how many values can live through the block.
648 for (i = 0; i < demand; ++i)
649 workset_insert(env, env->ws, to_insert[i]);
651 /* TODO: simplify this expression? */
652 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
653 bi->front_pressure = MAX(bi->front_pressure, workset_get_length(env->ws));
657 * For the given block @p block, decide for each values
658 * whether it is used from a register or is reloaded
661 static void belady(belady_env_t *env, int id) {
662 block_info_t *block_info = new_block_info(env, id);
663 const ir_node *block = block_info->bl;
669 DBG((dbg, DBG_WSETS, "Belady on %+F\n", block_info->bl));
670 new_vals = new_workset(env, &env->ob);
671 workset_clear(env->ws);
673 /* build the next use information for this block. */
674 build_next_uses(block_info);
677 block_info->first_non_in = NULL;
679 /* process the block from start to end */
680 sched_foreach(block, irn) {
682 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
684 /* projs are handled with the tuple value.
685 * Phis are no real instr (see insert_starters())
686 * instr_nr does not increase */
687 if (is_Proj(irn) || is_Phi(irn))
689 DBG((dbg, DBG_DECIDE, "\t%+F\n", irn));
691 if (!block_info->first_non_in)
692 block_info->first_non_in = irn;
694 /* set instruction in the workset */
697 /* allocate all values _used_ by this instruction */
698 workset_clear(new_vals);
699 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
700 workset_insert(env, new_vals, get_irn_n(irn, i));
702 DBG((dbg, DBG_DECIDE, "\t* uses\n"));
703 displace(block_info, new_vals, 1);
706 * set all used variables to the next use in their next_use_t list
707 * Also, kill all dead variables from the workset. They are only
708 * augmenting the pressure. Note, that a variable is dead
709 * if it has no further use in this block and is *not* live end
711 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
712 ir_node *op = get_irn_n(irn, i);
713 next_use_t *use = get_current_use(block_info, op);
716 if (!use->next && !be_is_live_end(env->lv, block, op))
717 workset_remove(env->ws, op);
719 advance_current_use(block_info, op);
722 /* allocate all values _defined_ by this instruction */
723 workset_clear(new_vals);
724 if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
725 const ir_edge_t *edge;
727 foreach_out_edge(irn, edge) {
728 ir_node *proj = get_edge_src_irn(edge);
729 workset_insert(env, new_vals, proj);
732 workset_insert(env, new_vals, irn);
734 DBG((dbg, DBG_DECIDE, "\t* defs\n"));
735 displace(block_info, new_vals, 0);
737 if (is_op_forking(get_irn_op(env->instr))) {
738 for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) {
739 ir_node *op = get_irn_n(env->instr, i);
740 block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->cls, op);
747 phase_free(&block_info->next_uses);
749 /* Remember end-workset for this block */
750 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
751 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
752 workset_foreach(block_info->ws_end, irn, iter)
753 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
754 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
756 /* now, initialize the front pressure to 0. */
757 block_info->front_pressure = 0;
762 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
763 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
764 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
765 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
770 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
771 #define workset_get_version(ws, i) ((ws)->vals[(i)].time)
773 #define ver_oldest (0)
774 #define ver_youngest ((unsigned) -1)
775 #define ver_make_newer(v) ((v) + 1)
776 #define ver_is_older(v, w) ((v) < (w))
777 #define ver_is_younger(v, w) ((v) > (w))
785 typedef struct _block_state_t {
786 struct _block_state_t *next;
787 struct _block_state_t *next_intern;
790 workset_t *end_state;
793 typedef struct _irn_action_t {
794 struct _irn_action_t *next;
800 typedef struct _global_end_state_t {
808 unsigned *bs_tops_vers;
809 block_state_t **bs_tops;
810 block_state_t *bs_top;
811 irn_action_t *ia_top;
812 } global_end_state_t;
816 block_state_t *bs_top;
817 irn_action_t *ia_top;
820 static INLINE block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi)
823 assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
824 return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
827 static INLINE const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
829 block_state_t *bs = get_block_state(ges, bi);
830 return bs ? bs->end_state : bi->ws_end;
833 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
835 block_state_t *bs = get_block_state(ges, bi);
836 block_state_t *nw = obstack_alloc(&ges->obst, sizeof(nw[0]));
838 nw->next_intern = bs;
839 nw->next = ges->bs_top;
843 nw->pressure = bs->pressure;
844 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
847 nw->pressure = bi->pressure;
848 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
852 ges->bs_tops[bi->id] = nw;
853 ges->bs_tops_vers[bi->id] = ges->version;
857 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
859 irn_action_t *ia = obstack_alloc(&ges->obst, sizeof(ia[0]));
863 ia->act = irn_act_none;
864 ia->next = ges->ia_top;
869 static INLINE rollback_info_t trans_begin(global_end_state_t *ges)
872 rb.obst_level = obstack_base(&ges->obst);
873 rb.bs_top = ges->bs_top;
874 rb.ia_top = ges->ia_top;
878 static INLINE void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
882 /* unwind all the stacks indiced with the block number */
883 for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
884 unsigned id = bs->bi->id;
885 ges->bs_tops[id] = bs->next_intern;
888 ges->ia_top = rb->ia_top;
889 ges->bs_top = rb->bs_top;
890 obstack_free(&ges->obst, rb->obst_level);
894 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
896 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
898 block_info_t *bi = get_block_info(bl);
899 const workset_t *end = get_end_state(ges, bi);
903 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
906 * to make the value available at end,
907 * we have several cases here.
909 * - we already visited that block.
910 * - If the value is in the final end set, return 0.
911 * somebody else already allocated it there.
912 * - If not and the final end set is already full,
913 * we cannot make the value available at the end
914 * of this block. return INFINITY.
915 * - Else (value not in final end set and there is room):
916 * 1) The value is in a register at the end of the local Belady pass.
917 * Allocate a slot in the final end set and return 0.
918 * 2) The value is not in the Belady end set:
919 * If the block's capacity is < k then check what it costs
920 * to transport the value from upper blocks to this block.
921 * Compare that against the reload cost in this block. If
922 * cheaper, do the other thing. If not, reload it here.
925 /* if the end set contains it already, it is in a reg and it costs nothing
926 * to load it to one. */
927 index = workset_get_index(end, irn);
929 unsigned ver = workset_get_version(end, index);
930 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
931 level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
934 * if the version is older, the value is already fixed
935 * and cannot be removed from the end set.
937 * If not, we will create a new block state for that block since
938 * we modify it by giving the end state a new version.
940 if (ver_is_younger(ver, ges->version)) {
941 block_state_t *bs = new_block_state(ges, bi);
942 workset_set_version(bs->end_state, index, ges->version);
950 * Now we have two options:
951 * 1) Reload the value at the end of the block.
952 * Therefore, perhaps, we have to erase another one from the workset.
953 * This may only be done if it has not been fixed.
954 * Since fixed means that a previous pass has decided that that value
955 * *has* to stay in the end set.
956 * 2) we can try, if the capacity of the block allows it, to let
957 * the value live through the block and make it available at
960 * First, we test the local (reload in this block) alternative
961 * and compare against the other alternative.
962 * Of course, we chose the cheaper one.
966 int n_regs = bi->free_at_jump;
967 int len = workset_get_length(end);
974 * look if there is room in the end array
975 * for the variable. Note that this does not
976 * mean that the var can live through the block.
977 * There is just room at the *end*
980 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
983 for (i = 0; i < len; ++i) {
984 unsigned ver = workset_get_version(end, i);
985 if (ver_is_younger(ver, ges->version))
990 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
991 level, end->vals[i].irn, i));
997 * finally there is some room. we can at least reload the value.
998 * but we will try to let ot live through anyhow.
1001 irn_action_t *vs = new_irn_action(ges, irn, bi->bl);
1002 block_state_t *bs = new_block_state(ges, bi);
1003 workset_t *end = bs->end_state;
1004 ir_node *ins_before = block_info_get_last_ins(bi);
1005 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
1006 int pressure_ok = bs->pressure < n_regs;
1008 if (reload_here < bi->reload_cost)
1012 * No matter what we do, the value will be in the end set
1013 * if the block from now on (of course only regarding the
1014 * current state). Enter it and set the new length
1017 end->vals[slot].irn = irn;
1018 workset_set_version(end, slot, ges->version);
1019 workset_set_length(end, MAX(workset_get_length(end), slot + 1));
1021 vs->act = irn_act_reload;
1024 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
1025 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
1028 /* look if we can bring the value in. */
1029 if (pressure_ok && reload_here > 0.0) {
1030 rollback_info_t rb = trans_begin(ges);
1031 double new_limit = MIN(reload_here, limit);
1033 vs->act = irn_act_live_through;
1035 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
1038 * if bring in is too expensive re-adjust the pressure
1039 * and roll back the state
1041 if (res >= reload_here) {
1043 vs->act = irn_act_reload;
1044 trans_rollback(ges, &rb);
1050 DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
1051 vs->act == irn_act_reload ? "reloading" : "bringing in"));
1056 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
1060 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
1062 belady_env_t *env = ges->env;
1063 double glob_costs = HUGE_VAL;
1065 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
1067 if (is_transport_in(bl, irn)) {
1068 int i, n = get_irn_arity(bl);
1069 ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0]));
1070 rollback_info_t rb = trans_begin(ges);
1073 for (i = 0; i < n; ++i) {
1074 ir_node *pr = get_Block_cfgpred_block(bl, i);
1075 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
1079 * there might by unknwons as operands of phis in that case
1080 * we set the costs to zero, since they won't get spilled.
1082 if (arch_irn_consider_in_reg_alloc(env->cls, op))
1083 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
1089 if (glob_costs >= limit) {
1090 glob_costs = HUGE_VAL;
1091 trans_rollback(ges, &rb);
1098 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
1102 static void materialize_and_commit_end_state(global_end_state_t *ges)
1104 belady_env_t *env = ges->env;
1108 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
1111 * Perform all the variable actions.
1113 for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
1115 case irn_act_live_through:
1117 block_info_t *bi = get_block_info(ia->bl);
1120 if (is_local_phi(ia->bl, ia->irn)) {
1121 bitset_add_irn(ges->succ_phis, ia->irn);
1122 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
1125 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list)
1126 ++iter->sect_pressure;
1127 ++bi->front_pressure;
1130 case irn_act_reload:
1131 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
1132 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
1135 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
1140 * Commit the block end states
1142 for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
1143 block_info_t *bi = bs->bi;
1145 if (!bitset_is_set(ges->committed, bi->id)) {
1146 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
1147 // bes->bs->end_state->vals[idx].version = ges->version;
1148 workset_copy(env, bi->ws_end, bs->end_state);
1149 DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
1150 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
1151 bi->pressure = bs->pressure;
1152 bitset_set(ges->committed, bi->id);
1156 /* clear the committed bitset. the next call is expecting it. */
1157 bitset_clear_all(ges->committed);
1160 static ir_node *better_spilled_here(const bring_in_t *br)
1162 const block_info_t *bi = br->bi;
1163 double spill_ef = get_block_info(get_nodes_block(br->irn))->exec_freq;
1166 * If the bring in node is a phi in the bring in block,
1167 * we look at all definitions and sum up their execution frequencies,
1168 * since spills will be placed there.
1169 * (except for the case where an operand is also a phi which is spilled :-( )
1170 * If that cost is higher than spilling the phi in that block, we opt for
1171 * bringing the phi into the block and spill it there.
1173 if (is_local_phi(bi->bl, br->irn)) {
1174 ir_node *bl = bi->bl;
1178 for (i = get_Block_n_cfgpreds(bl) - 1; i >= 0; --i)
1179 spill_ef += get_block_info(get_Block_cfgpred_block(bl, i))->exec_freq;
1182 return bi->exec_freq < spill_ef ? sched_prev(bi->first_non_in) : NULL;
1185 static int get_max_pressure_so_far(const block_info_t *bi, const bring_in_t *br)
1187 const struct list_head *l;
1190 assert(br->bi == bi);
1191 for (l = &br->list; l != &bi->br_head; l = l->prev) {
1192 br = list_entry(l, bring_in_t, list);
1193 res = MAX(res, br->sect_pressure);
1196 /* finally consider the front pressure distance and add the reference line (the global block pressure) */
1197 return MAX(res, bi->front_pressure);
1200 #define block_last_bring_in(bi) list_entry((bi)->br_head.prev, bring_in_t, list)
1203 static int get_block_max_pressure(const block_info_t *bi)
1205 int max = get_max_pressure_so_far(bi, block_last_bring_in(bi));
1206 return MAX(bi->pressure, max);
1211 * Try to bring a variable into a block.
1212 * @param ges The state of all end sets.
1213 * @param block The block.
1214 * @param irn The variable.
1216 static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
1218 block_info_t *bi = br->bi;
1219 ir_node *irn = br->irn;
1220 ir_node *bl = bi->bl;
1221 belady_env_t *env = ges->env;
1222 void *reset_level = obstack_base(&ges->obst);
1223 int k = env->n_regs;
1224 int pressure_upto_use = get_max_pressure_so_far(bi, br);
1225 int front_pressure = bi->front_pressure;
1226 ir_node *better_spill_loc = NULL;
1228 assert(front_pressure <= k);
1229 assert(pressure_upto_use <= k);
1231 DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n",
1232 irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use));
1234 // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn));
1237 * if we cannot bring the value to the use, let's see ifit would be worthwhile
1238 * to bring the value to the beginning of the block to have a better spill
1241 * better _spilled_here will return a node where the value can be spilled after
1242 * or NULL if this block does not provide a better spill location.
1245 if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn))
1246 better_spill_loc = better_spilled_here(br);
1250 * If either we can bring the value to the use or we should try
1251 * to bring it here to do the spill here, let's try to bring it in.
1253 if (better_spill_loc || pressure_upto_use < k) {
1255 double bring_in_costs, local_costs;
1256 rollback_info_t trans;
1259 /* process all variables which shall be in a reg at
1260 * the beginning of the block in the order of the next use. */
1261 local_costs = be_get_reload_costs(env->senv, irn, br->first_use);
1263 /* reset the lists */
1267 /* if the variable will live into this block, we must adapt the pressure.
1268 * The new pressure is the MAX of:
1269 * 1) the total block pressure
1270 * 2) the pressure so far + the front pressure increase + 1
1272 * If the second is larger than the first,
1273 * we have to increment the total block pressure and hence
1274 * save the old pressure to restire it in case of failing to
1275 * bring the variable into the block in a register.
1277 trans = trans_begin(ges);
1278 bs = new_block_state(ges, bi);
1279 pressure_inc = MAX(bs->pressure, better_spill_loc ? front_pressure : pressure_upto_use + 1);
1280 bs->pressure = pressure_inc;
1283 assert(bi->pressure <= k);
1284 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1285 bring_in_costs = can_bring_in(ges, bl, irn, local_costs, 1);
1286 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f\n", bring_in_costs, local_costs));
1289 * Following cases can now occur:
1290 * 1) There is room and costs ok
1291 * 2) Cannot bring to use but can spill at begin and costs are ok
1292 * 3) neither of both worked.
1294 * following actions can be taken:
1296 * b) mark phi as succeded if node was phi
1297 * c) insert reload at use location
1298 * d) give a spill location hint
1300 * this is the case/action matrix
1308 /* the costs were acceptable... */
1309 if (bring_in_costs < local_costs) {
1314 * case 1 and first part of case 2:
1315 * commit all the changes done. this manifests the bring-in action.
1316 * if the transport-in was a phi (that is actually used in block)
1317 * mark it in the succ_phis set to *not* phi spill it.
1319 materialize_and_commit_end_state(ges);
1320 if (is_local_phi(bl, irn))
1321 bitset_add_irn(ges->succ_phis, irn);
1323 DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc));
1325 /* second half of case 2 */
1326 if (pressure_upto_use >= k) {
1327 DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n",
1328 br->first_use, better_spill_loc));
1329 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1330 be_add_spill(env->senv, irn, better_spill_loc);
1331 ir_nodeset_insert(env->extra_spilled, irn);
1335 * go from the last bring in use to the first and add all the variabled
1336 * which additionally live through the block to their pressure.
1337 * at the point were the actually treated use is, we have to increase
1338 * the pressure by one more as the nrought in value starts to count.
1339 * Finally, adjust the front pressure as well.
1342 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) {
1344 pressure_inc += pressure_upto_use < k;
1345 iter->sect_pressure += pressure_inc;
1346 check = MAX(check, iter->sect_pressure);
1347 DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure));
1349 bi->front_pressure += pressure_inc;
1350 assert(MAX(check, bi->front_pressure) <= bi->pressure);
1351 DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure));
1354 /* case 3: nothing worked. insert normal reload and rollback. */
1356 DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use));
1357 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1358 bitset_add_irn(env->spilled, irn);
1359 trans_rollback(ges, &trans);
1363 /* there was no opportunity for optimization at all. reload and be sad ... */
1365 DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use));
1366 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1367 bitset_add_irn(env->spilled, irn);
1370 DBG((dbg, DBG_GLOBAL, "\n"));
1372 /* reset the obstack and create a new version. */
1373 obstack_free(&ges->obst, reset_level);
1374 ges->version = ver_make_newer(ges->version);
1377 static bring_in_t **determine_global_order(belady_env_t *env)
1383 for (i = env->n_blocks - 1; i >= 0; --i) {
1384 block_info_t *bi = get_block_info(env->blocks[i]);
1385 list_for_each_entry(bring_in_t, elm, &bi->br_head, list) {
1386 obstack_ptr_grow(&env->ob, elm);
1391 obstack_ptr_grow(&env->ob, NULL);
1392 res = obstack_finish(&env->ob);
1393 qsort(res, n, sizeof(res[0]), bring_in_cmp);
1400 static void global_assign(belady_env_t *env)
1402 ir_nodeset_iterator_t iter;
1403 global_end_state_t ges;
1409 * sort the blocks according to execution frequency.
1410 * That's not necessary for belady() but for the global pass later on.
1412 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_dfs_gt);
1414 memset(&ges, 0, sizeof(ges));
1415 obstack_init(&ges.obst);
1417 ges.version = ver_make_newer(ver_oldest);
1418 ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1419 ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1420 ges.bs_tops = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0]) * env->n_blocks);
1421 ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks);
1423 /* invalidate all state stack pointer versions */
1424 for (i = 0; i < env->n_blocks; ++i) {
1425 block_info_t *bi = get_block_info(env->blocks[i]);
1426 ges.bs_tops_vers[i] = ver_oldest;
1428 /* Set all block end sets entries to the youngest version */
1429 for (j = workset_get_length(bi->ws_end) - 1; j >= 0; --j)
1430 workset_set_version(bi->ws_end, j, ver_youngest);
1433 /* determine ordeer and optimize them */
1434 for (br = determine_global_order(env); *br; ++br)
1435 optimize_variable(&ges, *br);
1438 * Now we spill phis which cannot be kept since they were replaced
1439 * by reloads at the block entrances.
1441 for (i = 0; i < env->n_blocks; ++i) {
1442 ir_node *bl = env->blocks[i];
1445 sched_foreach(bl, irn) {
1449 if (arch_irn_consider_in_reg_alloc(env->cls, irn)
1450 && !bitset_contains_irn(ges.succ_phis, irn))
1451 be_spill_phi(env->senv, irn);
1455 /* check dominance for specially spilled nodes. */
1456 foreach_ir_nodeset (env->extra_spilled, irn, iter)
1457 make_spill_locations_dominate_irn(env->senv, irn);
1460 static void collect_blocks(ir_node *bl, void *data)
1462 belady_env_t *env = data;
1464 obstack_ptr_grow(&env->ob, bl);
1468 * Do spilling for a register class on a graph using the belady heuristic.
1469 * In the transformed graph, the register pressure never exceeds the number
1470 * of available registers.
1472 * @param birg The backend graph
1473 * @param cls The register class to spill
1475 void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
1477 ir_graph *irg = be_get_birg_irg(birg);
1481 /* some special classes contain only ignore regs, nothing to do then */
1482 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1486 be_clear_links(irg);
1488 /* init belady env */
1489 obstack_init(&env.ob);
1491 env.arch = be_get_birg_arch_env(birg);
1493 env.lv = be_get_birg_liveness(birg);
1494 env.dfs = env.lv->dfs;
1495 env.n_regs = n_regs;
1496 env.ws = new_workset(&env, &env.ob);
1497 env.senv = be_new_spill_env(birg);
1498 env.ef = be_get_birg_exec_freq(birg);
1499 env.spilled = bitset_irg_obstack_alloc(&env.ob, irg);
1500 env.extra_spilled = ir_nodeset_new(64);
1503 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1504 obstack_ptr_grow(&env.ob, NULL);
1505 env.blocks = obstack_finish(&env.ob);
1507 /* renumbering in the blocks gives nicer debug output as number are smaller. */
1508 #ifdef DEBUG_libfirm
1509 for (i = 0; i < env.n_blocks; ++i)
1510 sched_renumber(env.blocks[i]);
1513 /* Fix high register pressure in blocks with belady algorithm */
1514 for (i = 0; i < env.n_blocks; ++i)
1517 global_assign(&env);
1519 /* check dominance for specially spilled nodes. */
1521 ir_nodeset_iterator_t iter;
1524 foreach_ir_nodeset (env.extra_spilled, irn, iter)
1525 make_spill_locations_dominate_irn(env.senv, irn);
1528 /* Insert spill/reload nodes into the graph and fix usages */
1529 be_insert_spills_reloads(env.senv);
1532 be_delete_spill_env(env.senv);
1533 ir_nodeset_del(env.extra_spilled);
1535 obstack_free(&env.ob, NULL);
1538 void be_init_spillbelady2(void)
1540 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
1541 lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
1542 lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2");
1544 static be_spiller_t belady_spiller = {
1548 lc_opt_add_table(bel2_grp, options);
1549 be_register_spiller("belady2", &belady_spiller);
1550 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1553 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);