2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Sebastian Hack, Matthias Braun, Daniel Grund
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
41 #include "irnodeset.h"
43 #include "irprintf_t.h"
49 #include "iredges_t.h"
50 #include "irphase_t.h"
58 #include "besched_t.h"
62 #include "bechordal_t.h"
63 #include "bespilloptions.h"
64 #include "beloopana.h"
70 #include "lc_opts_enum.h"
79 #define DBG_WORKSET 128
80 #define DBG_GLOBAL 256
82 #define ALREADY_SPILLED_FACTOR 2
85 #define LIVE_END (DEAD-1)
86 #define REMAT_DIST (DEAD-2)
88 static int already_spilled_factor = 2;
89 static int remat_live_range_ext = 1;
90 static int global_pass_enabled = 1;
92 static const lc_opt_table_entry_t options[] = {
93 LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor),
94 LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
95 LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled),
99 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
102 * An association between a node and a point in time.
104 typedef struct _loc_t {
105 ir_node *irn; /**< A node. */
106 unsigned time; /**< A use time.
107 In the global pass this is used
108 as the version number and not as a time.
109 Only to save space...
113 typedef struct _workset_t {
114 int len; /**< current length */
115 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
118 typedef struct _belady_env_t {
122 const arch_env_t *arch;
123 const arch_register_class_t *cls;
127 ir_node **blocks; /**< Array of all blocks. */
128 int n_blocks; /**< Number of blocks in the graph. */
129 int n_regs; /**< number of regs in this reg-class */
130 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
131 ir_node *instr; /**< current instruction */
132 int instr_nr; /**< current instruction number (relative to block start) */
134 spill_env_t *senv; /**< see bespill.h */
135 bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */
136 ir_nodeset_t *extra_spilled; /** All nodes for which a special spill location has been computed. */
140 static int loc_compare(const void *a, const void *b)
144 return (p->time > q->time) - (p->time < q->time);
147 static inline void workset_print(const workset_t *w)
151 for(i = 0; i < w->len; ++i) {
152 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
157 * Alloc a new workset on obstack @p ob with maximum size @p max
159 static inline workset_t *new_workset(belady_env_t *env, struct obstack *ob) {
161 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
162 res = obstack_alloc(ob, size);
163 memset(res, 0, size);
168 * Alloc a new instance on obstack and make it equal to @param ws
170 static inline workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws) {
172 size_t size = sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]);
173 res = obstack_alloc(ob, size);
174 memcpy(res, ws, size);
179 * Do NOT alloc anything. Make @param tgt equal to @param src.
180 * returns @param tgt for convenience
182 static inline workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src) {
183 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
184 memcpy(tgt, src, size);
189 * Overwrites the current content array of @param ws with the
190 * @param count locations given at memory @param locs.
191 * Set the length of @param ws to count.
193 static inline void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs) {
194 workset->len = count;
195 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
199 * Inserts the value @p val into the workset, iff it is not
200 * already contained. The workset must not be full.
202 static inline void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val) {
204 /* check for current regclass */
205 if (!arch_irn_consider_in_reg_alloc(env->cls, val)) {
206 // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
210 /* check if val is already contained */
211 for(i=0; i<ws->len; ++i)
212 if (ws->vals[i].irn == val)
216 assert(ws->len < env->n_regs && "Workset already full!");
217 ws->vals[ws->len++].irn = val;
221 * Removes all entries from this workset
223 static inline void workset_clear(workset_t *ws) {
228 * Removes the value @p val from the workset if present.
230 static inline void workset_remove(workset_t *ws, ir_node *val) {
232 for(i=0; i<ws->len; ++i) {
233 if (ws->vals[i].irn == val) {
234 ws->vals[i] = ws->vals[--ws->len];
240 static inline int workset_get_index(const workset_t *ws, const ir_node *val) {
242 for(i=0; i<ws->len; ++i) {
243 if (ws->vals[i].irn == val)
251 * Iterates over all values in the working set.
252 * @p ws The workset to iterate
253 * @p v A variable to put the current value in
254 * @p i An integer for internal use
256 #define workset_foreach(ws, v, i) for(i=0; \
257 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
260 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
261 #define workset_get_time(ws, i) (ws)->vals[i].time
262 #define workset_set_length(ws, length) (ws)->len = length
263 #define workset_get_length(ws) ((ws)->len)
264 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
265 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
266 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
268 typedef struct _bring_in_t bring_in_t;
270 typedef struct _block_info_t {
275 workset_t *ws_end; /**< The end set after the local belady pass. */
276 double exec_freq; /**< The execution frequency of this block. */
278 double reload_cost; /**< Cost of a reload in this block. */
279 ir_node *first_non_in; /**< First node in block which is not a phi. */
280 ir_node *last_ins; /**< The instruction before which end of
281 block reloads will be inserted. */
283 int pressure; /**< The amount of registers which remain free
284 in this block. This capacity can be used to let
285 global variables, transported into other blocks,
286 live through this block. */
288 int front_pressure; /**< The pressure right before the first
289 real (non-phi) node. At the beginning
290 of the global pass, this is 0. */
291 struct list_head br_head; /**< List head for all bring_in variables. */
292 int free_at_jump; /**< registers free at jump. */
296 static inline void *new_block_info(belady_env_t *bel, int id)
298 ir_node *bl = bel->blocks[id];
299 block_info_t *res = obstack_alloc(&bel->ob, sizeof(*res));
300 memset(res, 0, sizeof(res[0]));
301 res->first_non_in = NULL;
302 res->last_ins = NULL;
306 res->exec_freq = get_block_execfreq(bel->ef, bl);
307 res->reload_cost = bel->arch->reload_cost * res->exec_freq;
308 res->free_at_jump = bel->n_regs;
309 INIT_LIST_HEAD(&res->br_head);
310 set_irn_link(bl, res);
314 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
315 #define set_block_info(block, info) set_irn_link(block, info)
317 static inline ir_node *block_info_get_last_ins(block_info_t *bi)
320 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
325 typedef struct _next_use_t {
326 unsigned is_first_use : 1; /**< Indicate that this use is the first
327 in the block. Needed to identify
328 transport in values for the global
330 sched_timestep_t step; /**< The time step of the use. */
332 struct _next_use_t *next; /**< The next use int this block
336 static void *next_use_init(ir_phase *phase, const ir_node *irn, void *old)
344 static void build_next_uses(block_info_t *bi)
348 sched_renumber(bi->bl);
350 phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
351 sched_foreach_reverse(bi->bl, irn) {
357 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
358 ir_node *op = get_irn_n(irn, i);
359 next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
360 next_use_t *use = phase_alloc(&bi->next_uses, sizeof(use[0]));
362 use->is_first_use = 1;
363 use->step = sched_get_time_step(irn);
368 curr->is_first_use = 0;
369 assert(curr->step >= use->step);
372 phase_set_irn_data(&bi->next_uses, op, use);
377 #define get_current_use(bi, irn) phase_get_irn_data(&(bi)->next_uses, (irn))
379 static inline void advance_current_use(block_info_t *bi, const ir_node *irn)
381 next_use_t *use = get_current_use(bi, irn);
384 phase_set_irn_data(&bi->next_uses, irn, use->next);
387 static __attribute__((unused)) int block_freq_gt(const void *a, const void *b)
389 const ir_node * const *p = a;
390 const ir_node * const *q = b;
391 block_info_t *pi = get_block_info(*p);
392 block_info_t *qi = get_block_info(*q);
393 double diff = qi->exec_freq - pi->exec_freq;
394 return (diff > 0) - (diff < 0);
397 static int block_freq_dfs_gt(const void *a, const void *b)
399 const ir_node * const *p = a;
400 const ir_node * const *q = b;
401 block_info_t *pi = get_block_info(*p);
402 block_info_t *qi = get_block_info(*q);
405 if ((pi->exec_freq > 1.0 && qi->exec_freq > 1.0)
406 || (pi->exec_freq <= 1.0 && qi->exec_freq <= 1.0)) {
408 const dfs_t *dfs = pi->bel->dfs;
409 int pp = dfs_get_post_num(dfs, pi->bl);
410 int pq = dfs_get_post_num(dfs, qi->bl);
414 diff = qi->exec_freq - pi->exec_freq;
415 return (diff > 0) - (diff < 0);
420 | __ ) _ __(_)_ __ __ _ |_ _|_ __
421 | _ \| '__| | '_ \ / _` | | || '_ \
422 | |_) | | | | | | | (_| | | || | | |
423 |____/|_| |_|_| |_|\__, | |___|_| |_|
426 Data structures to represent bring in variables.
430 ir_node *irn; /**< The node to bring in. */
431 block_info_t *bi; /**< The block to which bring in should happen. */
432 int pressure_so_far; /**< The maximal pressure till the first use of irn in bl. */
433 ir_node *first_use; /**< The first user of irn in bl. */
434 sched_timestep_t use_step; /**< Schedule step of the first use. */
436 int is_remat : 1; /**< Is rematerializable. */
437 int sect_pressure; /**< Offset to maximum pressure in block. */
438 struct list_head list;
439 struct list_head sect_list;
440 bring_in_t *sect_head;
443 static inline bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use)
445 bring_in_t *br = obstack_alloc(&bi->bel->ob, sizeof(br[0]));
449 br->first_use = use->irn;
450 br->use_step = use->step;
451 br->is_remat = be_is_rematerializable(bi->bel->senv, irn, use->irn);
452 br->pressure_so_far = bi->pressure;
453 br->sect_pressure = bi->front_pressure;
456 INIT_LIST_HEAD(&br->list);
457 INIT_LIST_HEAD(&br->sect_list);
458 list_add_tail(&br->list, &bi->br_head);
462 static int bring_in_cmp(const void *a, const void *b)
464 const bring_in_t *p = *(const bring_in_t * const *) a;
465 const bring_in_t *q = *(const bring_in_t * const *) b;
468 /* if one of both is a remat node, it will be done after the other. */
469 if (p->is_remat != q->is_remat)
470 return p->is_remat - q->is_remat;
472 /* in the same block, the one further in the front has to be processed first!
473 * Otherwise the front_pressure 'trick' is not exact. */
475 return p->use_step - q->use_step;
477 fp = p->bi->exec_freq;
478 fq = q->bi->exec_freq;
480 /* if both have the same frequency, inspect the frequency of the definition */
482 double fdp = get_block_info(get_nodes_block(p->irn))->exec_freq;
483 double fdq = get_block_info(get_nodes_block(q->irn))->exec_freq;
485 /* if the defs of both have the same freq, we go for reverse dfs post order. */
487 const dfs_t *dfs = p->bi->bel->dfs;
488 int pp = dfs_get_post_num(dfs, p->bi->bl);
489 int pq = dfs_get_post_num(dfs, q->bi->bl);
493 return (fdq > fdp) - (fdq < fdp);
496 return (fq > fp) - (fq < fp);
499 static inline unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
501 belady_env_t *env = bi->bel;
502 sched_timestep_t curr_step = sched_get_time_step(env->instr);
503 next_use_t *use = get_current_use(bi, irn);
504 int flags = arch_irn_get_flags(irn);
506 assert(!(flags & arch_irn_flags_ignore));
508 /* We have to keep non-spillable nodes in the working set */
509 if(flags & arch_irn_flags_dont_spill)
512 if (!is_usage && use && use->step == curr_step)
516 unsigned res = use->step - curr_step;
518 assert(use->step >= curr_step);
521 if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
523 else if (bitset_contains_irn(env->spilled, irn))
524 res *= already_spilled_factor;
530 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
533 static inline int is_local_phi(const ir_node *bl, const ir_node *irn)
535 return is_Phi(irn) && get_nodes_block(irn) == bl;
539 * Check, if the value is something that is transported into a block.
540 * That is, the value is defined elsewhere or defined by a Phi in the block.
541 * @param env The belady environment.
542 * @param bl The block in question.
543 * @param irn The node in question.
544 * @return 1, if node is something transported into @p bl, 0 if not.
545 * @note The function will only give correct answers in the case
546 * where @p irn is unused in the block @p bl which is always
547 * the case in our usage scenario.
549 static inline int is_transport_in(const ir_node *bl, const ir_node *irn)
551 return get_nodes_block(irn) != bl || is_Phi(irn);
555 * Performs the actions necessary to grant the request that:
556 * - new_vals can be held in registers
557 * - as few as possible other values are disposed
558 * - the worst values get disposed
560 * @p is_usage indicates that the values in new_vals are used (not defined)
561 * In this case reloads must be performed
563 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage) {
564 belady_env_t *env = bi->bel;
565 workset_t *ws = env->ws;
566 ir_node **to_insert = alloca(env->n_regs * sizeof(to_insert[0]));
568 int i, len, max_allowed, demand, iter;
572 1. Identify the number of needed slots and the values to reload
575 workset_foreach(new_vals, val, iter) {
576 /* mark value as used */
578 if (! workset_contains(ws, val)) {
579 DBG((dbg, DBG_DECIDE, "\t\tinsert %+F\n", val));
580 to_insert[demand++] = val;
582 next_use_t *use = get_current_use(bi, val);
585 * if we use a value which is transported in this block, i.e. a
586 * phi defined here or a live in, for the first time, we check
587 * if there is room for that guy to survive from the block's
588 * entrance to here or not.
591 assert(sched_get_time_step(env->instr) == (int) use->step);
592 if (is_transport_in(bi->bl, val) && use->is_first_use) {
593 bring_in_t *bri = new_bring_in(bi, val, use);
594 bri->first_use = env->instr;
596 /* reset the section pressure, since a new section starts. */
597 bi->front_pressure = 0;
599 DBG((dbg, DBG_DECIDE, "\t\tbring in node %+F, pressure %d:\n", val, bi->pressure));
600 DBG((dbg, DBG_DECIDE, "\t\tno reload. must be considered at block start\n"));
604 bitset_add_irn(env->spilled, val);
605 DBG((dbg, DBG_SPILL, "\t\tReload %+F before %+F\n", val, env->instr));
606 be_add_reload(env->senv, val, env->instr, env->cls, 1);
610 assert(is_usage && "Defined value already in workset?!?");
611 DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val));
614 DBG((dbg, DBG_DECIDE, "\t\tdemand = %d\n", demand));
617 2. Make room for at least 'demand' slots
619 len = workset_get_length(ws);
620 max_allowed = env->n_regs - demand;
622 /* Only make more free room if we do not have enough */
623 if (len > max_allowed) {
624 DBG((dbg, DBG_DECIDE, "\t\tdisposing %d values\n", len - max_allowed));
626 /* get current next-use distance */
627 for (i = 0; i < ws->len; ++i) {
628 ir_node *val = workset_get_val(ws, i);
629 unsigned dist = get_curr_distance(bi, val, is_usage);
630 workset_set_time(ws, i, dist);
633 /* sort entries by increasing nextuse-distance*/
636 /* kill the last 'demand' entries in the array */
637 workset_set_length(ws, max_allowed);
641 3. Insert the new values into the workset
642 Also, we update the pressure in the block info.
643 That is important for the global pass to decide
644 how many values can live through the block.
646 for (i = 0; i < demand; ++i)
647 workset_insert(env, env->ws, to_insert[i]);
649 /* TODO: simplify this expression? */
650 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
651 bi->front_pressure = MAX(bi->front_pressure, workset_get_length(env->ws));
655 * For the given block @p block, decide for each values
656 * whether it is used from a register or is reloaded
659 static void belady(belady_env_t *env, int id) {
660 block_info_t *block_info = new_block_info(env, id);
661 const ir_node *block = block_info->bl;
667 DBG((dbg, DBG_WSETS, "Belady on %+F\n", block_info->bl));
668 new_vals = new_workset(env, &env->ob);
669 workset_clear(env->ws);
671 /* build the next use information for this block. */
672 build_next_uses(block_info);
675 block_info->first_non_in = NULL;
677 /* process the block from start to end */
678 sched_foreach(block, irn) {
680 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
682 /* Projs are handled with the tuple value.
683 * Phis are no real instr (see insert_starters())
684 * instr_nr does not increase */
685 if (is_Proj(irn) || is_Phi(irn))
687 DBG((dbg, DBG_DECIDE, "\t%+F\n", irn));
689 if (!block_info->first_non_in)
690 block_info->first_non_in = irn;
692 /* set instruction in the workset */
695 /* allocate all values _used_ by this instruction */
696 workset_clear(new_vals);
697 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
698 workset_insert(env, new_vals, get_irn_n(irn, i));
700 DBG((dbg, DBG_DECIDE, "\t* uses\n"));
701 displace(block_info, new_vals, 1);
704 * set all used variables to the next use in their next_use_t list
705 * Also, kill all dead variables from the workset. They are only
706 * augmenting the pressure. Note, that a variable is dead
707 * if it has no further use in this block and is *not* live end
709 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
710 ir_node *op = get_irn_n(irn, i);
711 next_use_t *use = get_current_use(block_info, op);
714 if (!use->next && !be_is_live_end(env->lv, block, op))
715 workset_remove(env->ws, op);
717 advance_current_use(block_info, op);
720 /* allocate all values _defined_ by this instruction */
721 workset_clear(new_vals);
722 if (get_irn_mode(irn) == mode_T) { /* special handling for Tuples and Projs */
723 const ir_edge_t *edge;
725 foreach_out_edge(irn, edge) {
726 ir_node *proj = get_edge_src_irn(edge);
727 workset_insert(env, new_vals, proj);
730 workset_insert(env, new_vals, irn);
732 DBG((dbg, DBG_DECIDE, "\t* defs\n"));
733 displace(block_info, new_vals, 0);
735 if (is_op_forking(get_irn_op(env->instr))) {
736 for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) {
737 ir_node *op = get_irn_n(env->instr, i);
738 block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->cls, op);
745 phase_free(&block_info->next_uses);
747 /* Remember end-workset for this block */
748 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
749 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
750 workset_foreach(block_info->ws_end, irn, iter)
751 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
752 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
754 /* now, initialize the front pressure to 0. */
755 block_info->front_pressure = 0;
760 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
761 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
762 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
763 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
768 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
769 #define workset_get_version(ws, i) ((ws)->vals[(i)].time)
771 #define ver_oldest (0)
772 #define ver_youngest ((unsigned) -1)
773 #define ver_make_newer(v) ((v) + 1)
774 #define ver_is_older(v, w) ((v) < (w))
775 #define ver_is_younger(v, w) ((v) > (w))
783 typedef struct _block_state_t {
784 struct _block_state_t *next;
785 struct _block_state_t *next_intern;
788 workset_t *end_state;
791 typedef struct _irn_action_t {
792 struct _irn_action_t *next;
798 typedef struct _global_end_state_t {
806 unsigned *bs_tops_vers;
807 block_state_t **bs_tops;
808 block_state_t *bs_top;
809 irn_action_t *ia_top;
810 } global_end_state_t;
814 block_state_t *bs_top;
815 irn_action_t *ia_top;
818 static inline block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi)
821 assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
822 return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
825 static inline const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
827 block_state_t *bs = get_block_state(ges, bi);
828 return bs ? bs->end_state : bi->ws_end;
831 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
833 block_state_t *bs = get_block_state(ges, bi);
834 block_state_t *nw = obstack_alloc(&ges->obst, sizeof(nw[0]));
836 nw->next_intern = bs;
837 nw->next = ges->bs_top;
841 nw->pressure = bs->pressure;
842 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
845 nw->pressure = bi->pressure;
846 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
850 ges->bs_tops[bi->id] = nw;
851 ges->bs_tops_vers[bi->id] = ges->version;
855 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
857 irn_action_t *ia = obstack_alloc(&ges->obst, sizeof(ia[0]));
861 ia->act = irn_act_none;
862 ia->next = ges->ia_top;
867 static inline rollback_info_t trans_begin(global_end_state_t *ges)
870 rb.obst_level = obstack_base(&ges->obst);
871 rb.bs_top = ges->bs_top;
872 rb.ia_top = ges->ia_top;
876 static inline void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
880 /* unwind all the stacks indiced with the block number */
881 for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
882 unsigned id = bs->bi->id;
883 ges->bs_tops[id] = bs->next_intern;
886 ges->ia_top = rb->ia_top;
887 ges->bs_top = rb->bs_top;
888 obstack_free(&ges->obst, rb->obst_level);
892 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
894 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
896 block_info_t *bi = get_block_info(bl);
897 const workset_t *end = get_end_state(ges, bi);
901 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
904 * to make the value available at end,
905 * we have several cases here.
907 * - we already visited that block.
908 * - If the value is in the final end set, return 0.
909 * somebody else already allocated it there.
910 * - If not and the final end set is already full,
911 * we cannot make the value available at the end
912 * of this block. return INFINITY.
913 * - Else (value not in final end set and there is room):
914 * 1) The value is in a register at the end of the local Belady pass.
915 * Allocate a slot in the final end set and return 0.
916 * 2) The value is not in the Belady end set:
917 * If the block's capacity is < k then check what it costs
918 * to transport the value from upper blocks to this block.
919 * Compare that against the reload cost in this block. If
920 * cheaper, do the other thing. If not, reload it here.
923 /* if the end set contains it already, it is in a reg and it costs nothing
924 * to load it to one. */
925 index = workset_get_index(end, irn);
927 unsigned ver = workset_get_version(end, index);
928 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
929 level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
932 * if the version is older, the value is already fixed
933 * and cannot be removed from the end set.
935 * If not, we will create a new block state for that block since
936 * we modify it by giving the end state a new version.
938 if (ver_is_younger(ver, ges->version)) {
939 block_state_t *bs = new_block_state(ges, bi);
940 workset_set_version(bs->end_state, index, ges->version);
948 * Now we have two options:
949 * 1) Reload the value at the end of the block.
950 * Therefore, perhaps, we have to erase another one from the workset.
951 * This may only be done if it has not been fixed.
952 * Since fixed means that a previous pass has decided that that value
953 * *has* to stay in the end set.
954 * 2) we can try, if the capacity of the block allows it, to let
955 * the value live through the block and make it available at
958 * First, we test the local (reload in this block) alternative
959 * and compare against the other alternative.
960 * Of course, we chose the cheaper one.
964 int n_regs = bi->free_at_jump;
965 int len = workset_get_length(end);
972 * look if there is room in the end array
973 * for the variable. Note that this does not
974 * mean that the var can live through the block.
975 * There is just room at the *end*
978 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
981 for (i = 0; i < len; ++i) {
982 unsigned ver = workset_get_version(end, i);
983 if (ver_is_younger(ver, ges->version))
988 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
989 level, end->vals[i].irn, i));
995 * finally there is some room. we can at least reload the value.
996 * but we will try to let or live through anyhow.
999 irn_action_t *vs = new_irn_action(ges, irn, bi->bl);
1000 block_state_t *bs = new_block_state(ges, bi);
1001 workset_t *end = bs->end_state;
1002 ir_node *ins_before = block_info_get_last_ins(bi);
1003 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
1004 int pressure_ok = bs->pressure < n_regs;
1006 if (reload_here < bi->reload_cost)
1010 * No matter what we do, the value will be in the end set
1011 * if the block from now on (of course only regarding the
1012 * current state). Enter it and set the new length
1015 end->vals[slot].irn = irn;
1016 workset_set_version(end, slot, ges->version);
1017 workset_set_length(end, MAX(workset_get_length(end), slot + 1));
1019 vs->act = irn_act_reload;
1022 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
1023 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
1026 /* look if we can bring the value in. */
1027 if (pressure_ok && reload_here > 0.0) {
1028 rollback_info_t rb = trans_begin(ges);
1029 double new_limit = MIN(reload_here, limit);
1031 vs->act = irn_act_live_through;
1033 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
1036 * if bring in is too expensive re-adjust the pressure
1037 * and roll back the state
1039 if (res >= reload_here) {
1041 vs->act = irn_act_reload;
1042 trans_rollback(ges, &rb);
1048 DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
1049 vs->act == irn_act_reload ? "reloading" : "bringing in"));
1054 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
1058 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
1060 belady_env_t *env = ges->env;
1061 double glob_costs = HUGE_VAL;
1063 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
1065 if (is_transport_in(bl, irn)) {
1066 int i, n = get_irn_arity(bl);
1067 ir_node **nodes = alloca(get_irn_arity(bl) * sizeof(nodes[0]));
1068 rollback_info_t rb = trans_begin(ges);
1071 for (i = 0; i < n; ++i) {
1072 ir_node *pr = get_Block_cfgpred_block(bl, i);
1073 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
1077 * there might by Unknowns as operands of Phis in that case
1078 * we set the costs to zero, since they won't get spilled.
1080 if (arch_irn_consider_in_reg_alloc(env->cls, op))
1081 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
1087 if (glob_costs >= limit) {
1088 glob_costs = HUGE_VAL;
1089 trans_rollback(ges, &rb);
1096 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
1100 static void materialize_and_commit_end_state(global_end_state_t *ges)
1102 belady_env_t *env = ges->env;
1106 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
1109 * Perform all the variable actions.
1111 for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
1113 case irn_act_live_through:
1115 block_info_t *bi = get_block_info(ia->bl);
1118 if (is_local_phi(ia->bl, ia->irn)) {
1119 bitset_add_irn(ges->succ_phis, ia->irn);
1120 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
1123 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list)
1124 ++iter->sect_pressure;
1125 ++bi->front_pressure;
1128 case irn_act_reload:
1129 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
1130 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
1133 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
1138 * Commit the block end states
1140 for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
1141 block_info_t *bi = bs->bi;
1143 if (!bitset_is_set(ges->committed, bi->id)) {
1144 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
1145 // bes->bs->end_state->vals[idx].version = ges->version;
1146 workset_copy(env, bi->ws_end, bs->end_state);
1147 DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
1148 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
1149 bi->pressure = bs->pressure;
1150 bitset_set(ges->committed, bi->id);
1154 /* clear the committed bitset. the next call is expecting it. */
1155 bitset_clear_all(ges->committed);
1158 static ir_node *better_spilled_here(const bring_in_t *br)
1160 const block_info_t *bi = br->bi;
1161 double spill_ef = get_block_info(get_nodes_block(br->irn))->exec_freq;
1164 * If the bring in node is a phi in the bring in block,
1165 * we look at all definitions and sum up their execution frequencies,
1166 * since spills will be placed there.
1167 * (except for the case where an operand is also a phi which is spilled :-( )
1168 * If that cost is higher than spilling the phi in that block, we opt for
1169 * bringing the phi into the block and spill it there.
1171 if (is_local_phi(bi->bl, br->irn)) {
1172 ir_node *bl = bi->bl;
1176 for (i = get_Block_n_cfgpreds(bl) - 1; i >= 0; --i)
1177 spill_ef += get_block_info(get_Block_cfgpred_block(bl, i))->exec_freq;
1180 return bi->exec_freq < spill_ef ? sched_prev(bi->first_non_in) : NULL;
1183 static int get_max_pressure_so_far(const block_info_t *bi, const bring_in_t *br)
1185 const struct list_head *l;
1188 assert(br->bi == bi);
1189 for (l = &br->list; l != &bi->br_head; l = l->prev) {
1190 br = list_entry(l, bring_in_t, list);
1191 res = MAX(res, br->sect_pressure);
1194 /* finally consider the front pressure distance and add the reference line (the global block pressure) */
1195 return MAX(res, bi->front_pressure);
1198 #define block_last_bring_in(bi) list_entry((bi)->br_head.prev, bring_in_t, list)
1201 static int get_block_max_pressure(const block_info_t *bi)
1203 int max = get_max_pressure_so_far(bi, block_last_bring_in(bi));
1204 return MAX(bi->pressure, max);
1209 * Try to bring a variable into a block.
1210 * @param ges The state of all end sets.
1211 * @param block The block.
1212 * @param irn The variable.
1214 static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
1216 block_info_t *bi = br->bi;
1217 ir_node *irn = br->irn;
1218 ir_node *bl = bi->bl;
1219 belady_env_t *env = ges->env;
1220 void *reset_level = obstack_base(&ges->obst);
1221 int k = env->n_regs;
1222 int pressure_upto_use = get_max_pressure_so_far(bi, br);
1223 int front_pressure = bi->front_pressure;
1224 ir_node *better_spill_loc = NULL;
1226 assert(front_pressure <= k);
1227 assert(pressure_upto_use <= k);
1229 DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n",
1230 irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use));
1232 // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn));
1235 * if we cannot bring the value to the use, let's see if it would be worthwhile
1236 * to bring the value to the beginning of the block to have a better spill
1239 * better _spilled_here will return a node where the value can be spilled after
1240 * or NULL if this block does not provide a better spill location.
1243 if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn))
1244 better_spill_loc = better_spilled_here(br);
1248 * If either we can bring the value to the use or we should try
1249 * to bring it here to do the spill here, let's try to bring it in.
1251 if (better_spill_loc || pressure_upto_use < k) {
1253 double bring_in_costs, local_costs;
1254 rollback_info_t trans;
1257 /* process all variables which shall be in a reg at
1258 * the beginning of the block in the order of the next use. */
1259 local_costs = be_get_reload_costs(env->senv, irn, br->first_use);
1261 /* reset the lists */
1265 /* if the variable will live into this block, we must adapt the pressure.
1266 * The new pressure is the MAX of:
1267 * 1) the total block pressure
1268 * 2) the pressure so far + the front pressure increase + 1
1270 * If the second is larger than the first,
1271 * we have to increment the total block pressure and hence
1272 * save the old pressure to restore it in case of failing to
1273 * bring the variable into the block in a register.
1275 trans = trans_begin(ges);
1276 bs = new_block_state(ges, bi);
1277 pressure_inc = MAX(bs->pressure, better_spill_loc ? front_pressure : pressure_upto_use + 1);
1278 bs->pressure = pressure_inc;
1281 assert(bi->pressure <= k);
1282 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1283 bring_in_costs = can_bring_in(ges, bl, irn, local_costs, 1);
1284 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f\n", bring_in_costs, local_costs));
1287 * Following cases can now occur:
1288 * 1) There is room and costs ok
1289 * 2) Cannot bring to use but can spill at begin and costs are ok
1290 * 3) neither of both worked.
1292 * following actions can be taken:
1294 * b) mark phi as succeeded if node was phi
1295 * c) insert reload at use location
1296 * d) give a spill location hint
1298 * this is the case/action matrix
1306 /* the costs were acceptable... */
1307 if (bring_in_costs < local_costs) {
1312 * case 1 and first part of case 2:
1313 * commit all the changes done. this manifests the bring-in action.
1314 * if the transport-in was a phi (that is actually used in block)
1315 * mark it in the succ_phis set to *not* phi spill it.
1317 materialize_and_commit_end_state(ges);
1318 if (is_local_phi(bl, irn))
1319 bitset_add_irn(ges->succ_phis, irn);
1321 DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc));
1323 /* second half of case 2 */
1324 if (pressure_upto_use >= k) {
1325 DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n",
1326 br->first_use, better_spill_loc));
1327 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1328 be_add_spill(env->senv, irn, better_spill_loc);
1329 ir_nodeset_insert(env->extra_spilled, irn);
1333 * go from the last bring in use to the first and add all the variables
1334 * which additionally live through the block to their pressure.
1335 * at the point were the actually treated use is, we have to increase
1336 * the pressure by one more as the brought in value starts to count.
1337 * Finally, adjust the front pressure as well.
1340 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) {
1342 pressure_inc += pressure_upto_use < k;
1343 iter->sect_pressure += pressure_inc;
1344 check = MAX(check, iter->sect_pressure);
1345 DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure));
1347 bi->front_pressure += pressure_inc;
1348 assert(MAX(check, bi->front_pressure) <= bi->pressure);
1349 DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure));
1352 /* case 3: nothing worked. insert normal reload and rollback. */
1354 DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use));
1355 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1356 bitset_add_irn(env->spilled, irn);
1357 trans_rollback(ges, &trans);
1361 /* there was no opportunity for optimization at all. reload and be sad ... */
1363 DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use));
1364 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1365 bitset_add_irn(env->spilled, irn);
1368 DBG((dbg, DBG_GLOBAL, "\n"));
1370 /* reset the obstack and create a new version. */
1371 obstack_free(&ges->obst, reset_level);
1372 ges->version = ver_make_newer(ges->version);
1375 static bring_in_t **determine_global_order(belady_env_t *env)
1381 for (i = env->n_blocks - 1; i >= 0; --i) {
1382 block_info_t *bi = get_block_info(env->blocks[i]);
1383 list_for_each_entry(bring_in_t, elm, &bi->br_head, list) {
1384 obstack_ptr_grow(&env->ob, elm);
1389 obstack_ptr_grow(&env->ob, NULL);
1390 res = obstack_finish(&env->ob);
1391 qsort(res, n, sizeof(res[0]), bring_in_cmp);
1398 static void global_assign(belady_env_t *env)
1400 ir_nodeset_iterator_t iter;
1401 global_end_state_t ges;
1407 * sort the blocks according to execution frequency.
1408 * That's not necessary for belady() but for the global pass later on.
1410 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_dfs_gt);
1412 memset(&ges, 0, sizeof(ges));
1413 obstack_init(&ges.obst);
1415 ges.version = ver_make_newer(ver_oldest);
1416 ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1417 ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1418 ges.bs_tops = obstack_alloc(&ges.obst, sizeof(ges.bs_tops[0]) * env->n_blocks);
1419 ges.bs_tops_vers = obstack_alloc(&ges.obst, sizeof(ges.bs_tops_vers[0]) * env->n_blocks);
1421 /* invalidate all state stack pointer versions */
1422 for (i = 0; i < env->n_blocks; ++i) {
1423 block_info_t *bi = get_block_info(env->blocks[i]);
1424 ges.bs_tops_vers[i] = ver_oldest;
1426 /* Set all block end sets entries to the youngest version */
1427 for (j = workset_get_length(bi->ws_end) - 1; j >= 0; --j)
1428 workset_set_version(bi->ws_end, j, ver_youngest);
1431 /* determine order and optimize them */
1432 for (br = determine_global_order(env); *br; ++br)
1433 optimize_variable(&ges, *br);
1436 * Now we spill phis which cannot be kept since they were replaced
1437 * by reloads at the block entrances.
1439 for (i = 0; i < env->n_blocks; ++i) {
1440 ir_node *bl = env->blocks[i];
1443 sched_foreach(bl, irn) {
1447 if (arch_irn_consider_in_reg_alloc(env->cls, irn)
1448 && !bitset_contains_irn(ges.succ_phis, irn))
1449 be_spill_phi(env->senv, irn);
1453 /* check dominance for specially spilled nodes. */
1454 foreach_ir_nodeset (env->extra_spilled, irn, iter)
1455 make_spill_locations_dominate_irn(env->senv, irn);
1458 static void collect_blocks(ir_node *bl, void *data)
1460 belady_env_t *env = data;
1462 obstack_ptr_grow(&env->ob, bl);
1466 * Do spilling for a register class on a graph using the belady heuristic.
1467 * In the transformed graph, the register pressure never exceeds the number
1468 * of available registers.
1470 * @param birg The backend graph
1471 * @param cls The register class to spill
1473 void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
1475 ir_graph *irg = be_get_birg_irg(birg);
1479 /* some special classes contain only ignore regs, nothing to do then */
1480 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1484 be_clear_links(irg);
1486 /* init belady env */
1487 obstack_init(&env.ob);
1489 env.arch = be_get_birg_arch_env(birg);
1491 env.lv = be_get_birg_liveness(birg);
1492 env.dfs = env.lv->dfs;
1493 env.n_regs = n_regs;
1494 env.ws = new_workset(&env, &env.ob);
1495 env.senv = be_new_spill_env(birg);
1496 env.ef = be_get_birg_exec_freq(birg);
1497 env.spilled = bitset_irg_obstack_alloc(&env.ob, irg);
1498 env.extra_spilled = ir_nodeset_new(64);
1501 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1502 obstack_ptr_grow(&env.ob, NULL);
1503 env.blocks = obstack_finish(&env.ob);
1505 /* renumbering in the blocks gives nicer debug output as number are smaller. */
1506 #ifdef DEBUG_libfirm
1507 for (i = 0; i < env.n_blocks; ++i)
1508 sched_renumber(env.blocks[i]);
1511 /* Fix high register pressure in blocks with belady algorithm */
1512 for (i = 0; i < env.n_blocks; ++i)
1515 global_assign(&env);
1517 /* check dominance for specially spilled nodes. */
1519 ir_nodeset_iterator_t iter;
1522 foreach_ir_nodeset (env.extra_spilled, irn, iter)
1523 make_spill_locations_dominate_irn(env.senv, irn);
1526 /* Insert spill/reload nodes into the graph and fix usages */
1527 be_insert_spills_reloads(env.senv);
1530 be_delete_spill_env(env.senv);
1531 ir_nodeset_del(env.extra_spilled);
1533 obstack_free(&env.ob, NULL);
1536 void be_init_spillbelady2(void)
1538 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
1539 lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
1540 lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2");
1542 static be_spiller_t belady_spiller = {
1546 lc_opt_add_table(bel2_grp, options);
1547 be_register_spiller("belady2", &belady_spiller);
1548 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1551 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);