2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm version 2.
23 * @author Sebastian Hack, Matthias Braun, Daniel Grund
27 * The main differences to the original Belady are:
28 * - The workset is empty at the start of a block
29 * There is no attempt to fill it with variables which
30 * are not used in the block.
31 * - There is a global pass which tries to use the remaining
32 * capacity of the blocks to let global variables live through
41 #include "irnodeset.h"
43 #include "irprintf_t.h"
49 #include "iredges_t.h"
50 #include "irphase_t.h"
62 #include "bechordal_t.h"
64 #include "beloopana.h"
67 #include "bespillutil.h"
70 #include "lc_opts_enum.h"
79 #define DBG_WORKSET 128
80 #define DBG_GLOBAL 256
82 #define ALREADY_SPILLED_FACTOR 2
85 #define LIVE_END (DEAD-1)
86 #define REMAT_DIST (DEAD-2)
88 static int already_spilled_factor = 2;
89 static int remat_live_range_ext = 1;
90 static int global_pass_enabled = 1;
92 static const lc_opt_table_entry_t options[] = {
93 LC_OPT_ENT_INT ("asf", "already spilled factor", &already_spilled_factor),
94 LC_OPT_ENT_BOOL ("remat", "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
95 LC_OPT_ENT_BOOL ("global", "enable/disable the global pass", &global_pass_enabled),
99 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
102 * An association between a node and a point in time.
104 typedef struct loc_t {
105 ir_node *irn; /**< A node. */
106 unsigned time; /**< A use time.
107 In the global pass this is used
108 as the version number and not as a time.
109 Only to save space...
113 typedef struct workset_t {
114 int len; /**< current length */
115 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
118 typedef struct belady_env_t {
122 const arch_env_t *arch;
123 const arch_register_class_t *cls;
127 ir_node **blocks; /**< Array of all blocks. */
128 int n_blocks; /**< Number of blocks in the graph. */
129 int n_regs; /**< number of regs in this reg-class */
130 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
131 ir_node *instr; /**< current instruction */
132 int instr_nr; /**< current instruction number (relative to block start) */
134 spill_env_t *senv; /**< see bespill.h */
135 bitset_t *spilled; /**< bitset to keep all the irns which have already been spilled. */
136 ir_nodeset_t *extra_spilled; /** All nodes for which a special spill location has been computed. */
140 static int loc_compare(const void *a, const void *b)
142 const loc_t *p = (const loc_t*)a;
143 const loc_t *q = (const loc_t*)b;
144 return (p->time > q->time) - (p->time < q->time);
147 static inline void workset_print(const workset_t *w)
151 for (i = 0; i < w->len; ++i) {
152 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
157 * Alloc a new workset on obstack @p ob with maximum size @p max
159 static inline workset_t *new_workset(belady_env_t *env, struct obstack *ob)
161 return OALLOCFZ(ob, workset_t, vals, env->n_regs);
165 * Alloc a new instance on obstack and make it equal to @param ws
167 static inline workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws)
169 workset_t *res = OALLOCF(ob, workset_t, vals, env->n_regs);
170 memcpy(res, ws, sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]));
175 * Do NOT alloc anything. Make @param tgt equal to @param src.
176 * returns @param tgt for convenience
178 static inline workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src)
180 size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
181 memcpy(tgt, src, size);
186 * Overwrites the current content array of @param ws with the
187 * @param count locations given at memory @param locs.
188 * Set the length of @param ws to count.
190 static inline void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
192 workset->len = count;
193 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
197 * Inserts the value @p val into the workset, iff it is not
198 * already contained. The workset must not be full.
200 static inline void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val)
203 /* check for current regclass */
204 if (!arch_irn_consider_in_reg_alloc(env->cls, val)) {
205 // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
209 /* check if val is already contained */
210 for (i=0; i<ws->len; ++i)
211 if (ws->vals[i].irn == val)
215 assert(ws->len < env->n_regs && "Workset already full!");
216 ws->vals[ws->len++].irn = val;
220 * Removes all entries from this workset
222 static inline void workset_clear(workset_t *ws)
228 * Removes the value @p val from the workset if present.
230 static inline void workset_remove(workset_t *ws, ir_node *val)
233 for (i=0; i<ws->len; ++i) {
234 if (ws->vals[i].irn == val) {
235 ws->vals[i] = ws->vals[--ws->len];
241 static inline int workset_get_index(const workset_t *ws, const ir_node *val)
244 for (i=0; i<ws->len; ++i) {
245 if (ws->vals[i].irn == val)
253 * Iterates over all values in the working set.
254 * @p ws The workset to iterate
255 * @p v A variable to put the current value in
256 * @p i An integer for internal use
258 #define workset_foreach(ws, v, i) for (i=0; \
259 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
262 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
263 #define workset_get_time(ws, i) (ws)->vals[i].time
264 #define workset_set_length(ws, length) (ws)->len = length
265 #define workset_get_length(ws) ((ws)->len)
266 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
267 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
268 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
270 typedef struct bring_in_t bring_in_t;
272 typedef struct block_info_t {
277 workset_t *ws_end; /**< The end set after the local belady pass. */
278 double exec_freq; /**< The execution frequency of this block. */
280 double reload_cost; /**< Cost of a reload in this block. */
281 ir_node *first_non_in; /**< First node in block which is not a phi. */
282 ir_node *last_ins; /**< The instruction before which end of
283 block reloads will be inserted. */
285 int pressure; /**< The amount of registers which remain free
286 in this block. This capacity can be used to let
287 global variables, transported into other blocks,
288 live through this block. */
290 int front_pressure; /**< The pressure right before the first
291 real (non-phi) node. At the beginning
292 of the global pass, this is 0. */
293 struct list_head br_head; /**< List head for all bring_in variables. */
294 int free_at_jump; /**< registers free at jump. */
298 static inline block_info_t *new_block_info(belady_env_t *bel, int id)
300 ir_node *bl = bel->blocks[id];
301 block_info_t *res = OALLOCZ(&bel->ob, block_info_t);
302 res->first_non_in = NULL;
303 res->last_ins = NULL;
307 res->exec_freq = get_block_execfreq(bel->ef, bl);
308 res->reload_cost = bel->arch->reload_cost * res->exec_freq;
309 res->free_at_jump = bel->n_regs;
310 INIT_LIST_HEAD(&res->br_head);
311 set_irn_link(bl, res);
315 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
316 #define set_block_info(block, info) set_irn_link(block, info)
318 static inline ir_node *block_info_get_last_ins(block_info_t *bi)
321 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
326 typedef struct next_use_t {
327 unsigned is_first_use : 1; /**< Indicate that this use is the first
328 in the block. Needed to identify
329 transport in values for the global
331 sched_timestep_t step; /**< The time step of the use. */
333 struct next_use_t *next; /**< The next use int this block
337 static void build_next_uses(block_info_t *bi)
341 sched_renumber(bi->bl);
343 phase_init(&bi->next_uses, bi->bel->irg, phase_irn_init_default);
344 sched_foreach_reverse(bi->bl, irn) {
350 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
351 ir_node *op = get_irn_n(irn, i);
352 next_use_t *curr = (next_use_t*)phase_get_irn_data(&bi->next_uses, op);
353 next_use_t *use = (next_use_t*)phase_alloc(&bi->next_uses, sizeof(use[0]));
355 use->is_first_use = 1;
356 use->step = sched_get_time_step(irn);
361 curr->is_first_use = 0;
362 assert(curr->step >= use->step);
365 phase_set_irn_data(&bi->next_uses, op, use);
370 static inline next_use_t *get_current_use(block_info_t *bi, const ir_node *node)
372 return (next_use_t*)phase_get_irn_data(&bi->next_uses, node);
375 static inline void advance_current_use(block_info_t *bi, const ir_node *irn)
377 next_use_t *use = get_current_use(bi, irn);
380 phase_set_irn_data(&bi->next_uses, irn, use->next);
383 static __attribute__((unused)) int block_freq_gt(const void *a, const void *b)
385 const ir_node * const *p = (const ir_node**)a;
386 const ir_node * const *q = (const ir_node**)b;
387 block_info_t *pi = get_block_info(*p);
388 block_info_t *qi = get_block_info(*q);
389 double diff = qi->exec_freq - pi->exec_freq;
390 return (diff > 0) - (diff < 0);
393 static int block_freq_dfs_gt(const void *a, const void *b)
395 const ir_node * const *p = (const ir_node**)a;
396 const ir_node * const *q = (const ir_node**)b;
397 block_info_t *pi = get_block_info(*p);
398 block_info_t *qi = get_block_info(*q);
401 if ((pi->exec_freq > 1.0 && qi->exec_freq > 1.0)
402 || (pi->exec_freq <= 1.0 && qi->exec_freq <= 1.0)) {
404 const dfs_t *dfs = pi->bel->dfs;
405 int pp = dfs_get_post_num(dfs, pi->bl);
406 int pq = dfs_get_post_num(dfs, qi->bl);
410 diff = qi->exec_freq - pi->exec_freq;
411 return (diff > 0) - (diff < 0);
416 | __ ) _ __(_)_ __ __ _ |_ _|_ __
417 | _ \| '__| | '_ \ / _` | | || '_ \
418 | |_) | | | | | | | (_| | | || | | |
419 |____/|_| |_|_| |_|\__, | |___|_| |_|
422 Data structures to represent bring in variables.
426 ir_node *irn; /**< The node to bring in. */
427 block_info_t *bi; /**< The block to which bring in should happen. */
428 int pressure_so_far; /**< The maximal pressure till the first use of irn in bl. */
429 ir_node *first_use; /**< The first user of irn in bl. */
430 sched_timestep_t use_step; /**< Schedule step of the first use. */
432 int is_remat : 1; /**< Is rematerializable. */
433 int sect_pressure; /**< Offset to maximum pressure in block. */
434 struct list_head list;
435 struct list_head sect_list;
436 bring_in_t *sect_head;
439 static inline bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use)
441 bring_in_t *br = OALLOC(&bi->bel->ob, bring_in_t);
444 br->first_use = use->irn;
445 br->use_step = use->step;
446 br->is_remat = be_is_rematerializable(bi->bel->senv, irn, use->irn);
447 br->pressure_so_far = bi->pressure;
448 br->sect_pressure = bi->front_pressure;
451 INIT_LIST_HEAD(&br->list);
452 INIT_LIST_HEAD(&br->sect_list);
453 list_add_tail(&br->list, &bi->br_head);
457 static int bring_in_cmp(const void *a, const void *b)
459 const bring_in_t *p = *(const bring_in_t * const *) a;
460 const bring_in_t *q = *(const bring_in_t * const *) b;
463 /* if one of both is a remat node, it will be done after the other. */
464 if (p->is_remat != q->is_remat)
465 return p->is_remat - q->is_remat;
467 /* in the same block, the one further in the front has to be processed first!
468 * Otherwise the front_pressure 'trick' is not exact. */
470 return p->use_step - q->use_step;
472 fp = p->bi->exec_freq;
473 fq = q->bi->exec_freq;
475 /* if both have the same frequency, inspect the frequency of the definition */
477 double fdp = get_block_info(get_nodes_block(p->irn))->exec_freq;
478 double fdq = get_block_info(get_nodes_block(q->irn))->exec_freq;
480 /* if the defs of both have the same freq, we go for reverse dfs post order. */
482 const dfs_t *dfs = p->bi->bel->dfs;
483 int pp = dfs_get_post_num(dfs, p->bi->bl);
484 int pq = dfs_get_post_num(dfs, q->bi->bl);
488 return (fdq > fdp) - (fdq < fdp);
491 return (fq > fp) - (fq < fp);
494 static inline unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
496 belady_env_t *env = bi->bel;
497 sched_timestep_t curr_step = sched_get_time_step(env->instr);
498 next_use_t *use = get_current_use(bi, irn);
499 int flags = arch_irn_get_flags(irn);
501 assert(!arch_irn_is_ignore(irn));
503 /* We have to keep non-spillable nodes in the working set */
504 if (flags & arch_irn_flags_dont_spill)
507 if (!is_usage && use && use->step == curr_step)
511 unsigned res = use->step - curr_step;
513 assert(use->step >= curr_step);
516 if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
518 else if (bitset_contains_irn(env->spilled, irn))
519 res *= already_spilled_factor;
525 return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
528 static inline int is_local_phi(const ir_node *bl, const ir_node *irn)
530 return is_Phi(irn) && get_nodes_block(irn) == bl;
534 * Check, if the value is something that is transported into a block.
535 * That is, the value is defined elsewhere or defined by a Phi in the block.
536 * @param env The belady environment.
537 * @param bl The block in question.
538 * @param irn The node in question.
539 * @return 1, if node is something transported into @p bl, 0 if not.
540 * @note The function will only give correct answers in the case
541 * where @p irn is unused in the block @p bl which is always
542 * the case in our usage scenario.
544 static inline int is_transport_in(const ir_node *bl, const ir_node *irn)
546 return get_nodes_block(irn) != bl || is_Phi(irn);
550 * Performs the actions necessary to grant the request that:
551 * - new_vals can be held in registers
552 * - as few as possible other values are disposed
553 * - the worst values get disposed
555 * @p is_usage indicates that the values in new_vals are used (not defined)
556 * In this case reloads must be performed
558 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage)
560 belady_env_t *env = bi->bel;
561 workset_t *ws = env->ws;
562 ir_node **to_insert = ALLOCAN(ir_node*, env->n_regs);
564 int i, len, max_allowed, demand, iter;
568 1. Identify the number of needed slots and the values to reload
571 workset_foreach(new_vals, val, iter) {
572 /* mark value as used */
574 if (! workset_contains(ws, val)) {
575 DBG((dbg, DBG_DECIDE, "\t\tinsert %+F\n", val));
576 to_insert[demand++] = val;
578 next_use_t *use = get_current_use(bi, val);
581 * if we use a value which is transported in this block, i.e. a
582 * phi defined here or a live in, for the first time, we check
583 * if there is room for that guy to survive from the block's
584 * entrance to here or not.
587 assert(sched_get_time_step(env->instr) == (int) use->step);
588 if (is_transport_in(bi->bl, val) && use->is_first_use) {
589 bring_in_t *bri = new_bring_in(bi, val, use);
590 bri->first_use = env->instr;
592 /* reset the section pressure, since a new section starts. */
593 bi->front_pressure = 0;
595 DBG((dbg, DBG_DECIDE, "\t\tbring in node %+F, pressure %d:\n", val, bi->pressure));
596 DBG((dbg, DBG_DECIDE, "\t\tno reload. must be considered at block start\n"));
600 bitset_add_irn(env->spilled, val);
601 DBG((dbg, DBG_SPILL, "\t\tReload %+F before %+F\n", val, env->instr));
602 be_add_reload(env->senv, val, env->instr, env->cls, 1);
606 assert(is_usage && "Defined value already in workset?!?");
607 DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val));
610 DBG((dbg, DBG_DECIDE, "\t\tdemand = %d\n", demand));
613 2. Make room for at least 'demand' slots
615 len = workset_get_length(ws);
616 max_allowed = env->n_regs - demand;
618 /* Only make more free room if we do not have enough */
619 if (len > max_allowed) {
620 DBG((dbg, DBG_DECIDE, "\t\tdisposing %d values\n", len - max_allowed));
622 /* get current next-use distance */
623 for (i = 0; i < ws->len; ++i) {
624 ir_node *val = workset_get_val(ws, i);
625 unsigned dist = get_curr_distance(bi, val, is_usage);
626 workset_set_time(ws, i, dist);
629 /* sort entries by increasing nextuse-distance*/
632 /* kill the last 'demand' entries in the array */
633 workset_set_length(ws, max_allowed);
637 3. Insert the new values into the workset
638 Also, we update the pressure in the block info.
639 That is important for the global pass to decide
640 how many values can live through the block.
642 for (i = 0; i < demand; ++i)
643 workset_insert(env, env->ws, to_insert[i]);
645 /* TODO: simplify this expression? */
646 bi->pressure = MAX(bi->pressure, workset_get_length(env->ws));
647 bi->front_pressure = MAX(bi->front_pressure, workset_get_length(env->ws));
651 * For the given block @p block, decide for each values
652 * whether it is used from a register or is reloaded
655 static void belady(belady_env_t *env, int id)
657 block_info_t *block_info = new_block_info(env, id);
658 const ir_node *block = block_info->bl;
664 DBG((dbg, DBG_WSETS, "Belady on %+F\n", block_info->bl));
665 new_vals = new_workset(env, &env->ob);
666 workset_clear(env->ws);
668 /* build the next use information for this block. */
669 build_next_uses(block_info);
672 block_info->first_non_in = NULL;
674 /* process the block from start to end */
675 sched_foreach(block, irn) {
677 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
679 /* Projs are handled with the tuple value.
680 * Phis are no real instr (see insert_starters())
681 * instr_nr does not increase */
682 if (is_Proj(irn) || is_Phi(irn))
684 DBG((dbg, DBG_DECIDE, "\t%+F\n", irn));
686 if (!block_info->first_non_in)
687 block_info->first_non_in = irn;
689 /* set instruction in the workset */
692 /* allocate all values _used_ by this instruction */
693 workset_clear(new_vals);
694 for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
695 workset_insert(env, new_vals, get_irn_n(irn, i));
697 DBG((dbg, DBG_DECIDE, "\t* uses\n"));
698 displace(block_info, new_vals, 1);
701 * set all used variables to the next use in their next_use_t list
702 * Also, kill all dead variables from the workset. They are only
703 * augmenting the pressure. Note, that a variable is dead
704 * if it has no further use in this block and is *not* live end
706 for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
707 ir_node *op = get_irn_n(irn, i);
708 next_use_t *use = get_current_use(block_info, op);
711 if (!use->next && !be_is_live_end(env->lv, block, op))
712 workset_remove(env->ws, op);
714 advance_current_use(block_info, op);
717 /* allocate all values _defined_ by this instruction */
718 workset_clear(new_vals);
719 if (get_irn_mode(irn) == mode_T) { /* special handling for Tuples and Projs */
720 const ir_edge_t *edge;
722 foreach_out_edge(irn, edge) {
723 ir_node *proj = get_edge_src_irn(edge);
724 workset_insert(env, new_vals, proj);
727 workset_insert(env, new_vals, irn);
729 DBG((dbg, DBG_DECIDE, "\t* defs\n"));
730 displace(block_info, new_vals, 0);
732 if (is_op_forking(get_irn_op(env->instr))) {
733 for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) {
734 ir_node *op = get_irn_n(env->instr, i);
735 block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->cls, op);
742 phase_deinit(&block_info->next_uses);
744 /* Remember end-workset for this block */
745 block_info->ws_end = workset_clone(env, &env->ob, env->ws);
746 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
747 workset_foreach(block_info->ws_end, irn, iter)
748 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
749 DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
751 /* now, initialize the front pressure to 0. */
752 block_info->front_pressure = 0;
757 |_ _| |__ ___ __ _| | ___ | |__ __ _| | | _ \ __ _ _ __| |_
758 | | | '_ \ / _ \ / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
759 | | | | | | __/ | (_| | | (_) | |_) | (_| | | | __/ (_| | | | |_
760 |_| |_| |_|\___| \__, |_|\___/|_.__/ \__,_|_| |_| \__,_|_| \__|
765 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
766 #define workset_get_version(ws, i) ((ws)->vals[(i)].time)
768 #define ver_oldest (0)
769 #define ver_youngest ((unsigned) -1)
770 #define ver_make_newer(v) ((v) + 1)
771 #define ver_is_older(v, w) ((v) < (w))
772 #define ver_is_younger(v, w) ((v) > (w))
780 typedef struct block_state_t {
781 struct block_state_t *next;
782 struct block_state_t *next_intern;
785 workset_t *end_state;
788 typedef struct irn_action_t {
789 struct irn_action_t *next;
795 typedef struct global_end_state_t {
803 unsigned *bs_tops_vers;
804 block_state_t **bs_tops;
805 block_state_t *bs_top;
806 irn_action_t *ia_top;
807 } global_end_state_t;
811 block_state_t *bs_top;
812 irn_action_t *ia_top;
815 static inline block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi)
818 assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
819 return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
822 static inline const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
824 block_state_t *bs = get_block_state(ges, bi);
825 return bs ? bs->end_state : bi->ws_end;
828 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
830 block_state_t *bs = get_block_state(ges, bi);
831 block_state_t *nw = OALLOC(&ges->obst, block_state_t);
833 nw->next_intern = bs;
834 nw->next = ges->bs_top;
838 nw->pressure = bs->pressure;
839 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
842 nw->pressure = bi->pressure;
843 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
847 ges->bs_tops[bi->id] = nw;
848 ges->bs_tops_vers[bi->id] = ges->version;
852 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
854 irn_action_t *ia = OALLOC(&ges->obst, irn_action_t);
858 ia->act = irn_act_none;
859 ia->next = ges->ia_top;
864 static inline rollback_info_t trans_begin(global_end_state_t *ges)
867 rb.obst_level = obstack_base(&ges->obst);
868 rb.bs_top = ges->bs_top;
869 rb.ia_top = ges->ia_top;
873 static inline void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
877 /* unwind all the stacks indiced with the block number */
878 for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
879 unsigned id = bs->bi->id;
880 ges->bs_tops[id] = bs->next_intern;
883 ges->ia_top = rb->ia_top;
884 ges->bs_top = rb->bs_top;
885 obstack_free(&ges->obst, rb->obst_level);
889 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
891 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
893 block_info_t *bi = get_block_info(bl);
894 const workset_t *end = get_end_state(ges, bi);
898 DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
901 * to make the value available at end,
902 * we have several cases here.
904 * - we already visited that block.
905 * - If the value is in the final end set, return 0.
906 * somebody else already allocated it there.
907 * - If not and the final end set is already full,
908 * we cannot make the value available at the end
909 * of this block. return INFINITY.
910 * - Else (value not in final end set and there is room):
911 * 1) The value is in a register at the end of the local Belady pass.
912 * Allocate a slot in the final end set and return 0.
913 * 2) The value is not in the Belady end set:
914 * If the block's capacity is < k then check what it costs
915 * to transport the value from upper blocks to this block.
916 * Compare that against the reload cost in this block. If
917 * cheaper, do the other thing. If not, reload it here.
920 /* if the end set contains it already, it is in a reg and it costs nothing
921 * to load it to one. */
922 index = workset_get_index(end, irn);
924 unsigned ver = workset_get_version(end, index);
925 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
926 level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
929 * if the version is older, the value is already fixed
930 * and cannot be removed from the end set.
932 * If not, we will create a new block state for that block since
933 * we modify it by giving the end state a new version.
935 if (ver_is_younger(ver, ges->version)) {
936 block_state_t *bs = new_block_state(ges, bi);
937 workset_set_version(bs->end_state, index, ges->version);
945 * Now we have two options:
946 * 1) Reload the value at the end of the block.
947 * Therefore, perhaps, we have to erase another one from the workset.
948 * This may only be done if it has not been fixed.
949 * Since fixed means that a previous pass has decided that that value
950 * *has* to stay in the end set.
951 * 2) we can try, if the capacity of the block allows it, to let
952 * the value live through the block and make it available at
955 * First, we test the local (reload in this block) alternative
956 * and compare against the other alternative.
957 * Of course, we chose the cheaper one.
961 int n_regs = bi->free_at_jump;
962 int len = workset_get_length(end);
969 * look if there is room in the end array
970 * for the variable. Note that this does not
971 * mean that the var can live through the block.
972 * There is just room at the *end*
975 DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
978 for (i = 0; i < len; ++i) {
979 unsigned ver = workset_get_version(end, i);
980 if (ver_is_younger(ver, ges->version))
985 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
986 level, end->vals[i].irn, i));
992 * finally there is some room. we can at least reload the value.
993 * but we will try to let or live through anyhow.
996 irn_action_t *vs = new_irn_action(ges, irn, bi->bl);
997 block_state_t *bs = new_block_state(ges, bi);
998 workset_t *end = bs->end_state;
999 ir_node *ins_before = block_info_get_last_ins(bi);
1000 double reload_here = be_get_reload_costs(bi->bel->senv, irn, ins_before);
1001 int pressure_ok = bs->pressure < n_regs;
1003 if (reload_here < bi->reload_cost)
1007 * No matter what we do, the value will be in the end set
1008 * if the block from now on (of course only regarding the
1009 * current state). Enter it and set the new length
1012 end->vals[slot].irn = irn;
1013 workset_set_version(end, slot, ges->version);
1014 workset_set_length(end, MAX(workset_get_length(end), slot + 1));
1016 vs->act = irn_act_reload;
1019 DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
1020 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
1023 /* look if we can bring the value in. */
1024 if (pressure_ok && reload_here > 0.0) {
1025 rollback_info_t rb = trans_begin(ges);
1026 double new_limit = MIN(reload_here, limit);
1028 vs->act = irn_act_live_through;
1030 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
1033 * if bring in is too expensive re-adjust the pressure
1034 * and roll back the state
1036 if (res >= reload_here) {
1038 vs->act = irn_act_reload;
1039 trans_rollback(ges, &rb);
1045 DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
1046 vs->act == irn_act_reload ? "reloading" : "bringing in"));
1051 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
1055 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
1057 belady_env_t *env = ges->env;
1058 double glob_costs = HUGE_VAL;
1060 DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
1062 if (is_transport_in(bl, irn)) {
1063 int i, n = get_irn_arity(bl);
1064 rollback_info_t rb = trans_begin(ges);
1067 for (i = 0; i < n; ++i) {
1068 ir_node *pr = get_Block_cfgpred_block(bl, i);
1069 ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
1073 * there might by Unknowns as operands of Phis in that case
1074 * we set the costs to zero, since they won't get spilled.
1076 if (arch_irn_consider_in_reg_alloc(env->cls, op))
1077 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
1083 if (glob_costs >= limit) {
1084 glob_costs = HUGE_VAL;
1085 trans_rollback(ges, &rb);
1092 DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
1096 static void materialize_and_commit_end_state(global_end_state_t *ges)
1098 belady_env_t *env = ges->env;
1102 DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
1105 * Perform all the variable actions.
1107 for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
1109 case irn_act_live_through:
1111 block_info_t *bi = get_block_info(ia->bl);
1114 if (is_local_phi(ia->bl, ia->irn)) {
1115 bitset_add_irn(ges->succ_phis, ia->irn);
1116 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
1119 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list)
1120 ++iter->sect_pressure;
1121 ++bi->front_pressure;
1124 case irn_act_reload:
1125 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
1126 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
1129 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
1134 * Commit the block end states
1136 for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
1137 block_info_t *bi = bs->bi;
1139 if (!bitset_is_set(ges->committed, bi->id)) {
1140 DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
1141 // bes->bs->end_state->vals[idx].version = ges->version;
1142 workset_copy(env, bi->ws_end, bs->end_state);
1143 DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
1144 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
1145 bi->pressure = bs->pressure;
1146 bitset_set(ges->committed, bi->id);
1150 /* clear the committed bitset. the next call is expecting it. */
1151 bitset_clear_all(ges->committed);
1154 static ir_node *better_spilled_here(const bring_in_t *br)
1156 const block_info_t *bi = br->bi;
1157 double spill_ef = get_block_info(get_nodes_block(br->irn))->exec_freq;
1160 * If the bring in node is a phi in the bring in block,
1161 * we look at all definitions and sum up their execution frequencies,
1162 * since spills will be placed there.
1163 * (except for the case where an operand is also a phi which is spilled :-( )
1164 * If that cost is higher than spilling the phi in that block, we opt for
1165 * bringing the phi into the block and spill it there.
1167 if (is_local_phi(bi->bl, br->irn)) {
1168 ir_node *bl = bi->bl;
1172 for (i = get_Block_n_cfgpreds(bl) - 1; i >= 0; --i)
1173 spill_ef += get_block_info(get_Block_cfgpred_block(bl, i))->exec_freq;
1176 return bi->exec_freq < spill_ef ? sched_prev(bi->first_non_in) : NULL;
1179 static int get_max_pressure_so_far(const block_info_t *bi, const bring_in_t *br)
1181 const struct list_head *l;
1184 assert(br->bi == bi);
1185 for (l = &br->list; l != &bi->br_head; l = l->prev) {
1186 br = list_entry(l, bring_in_t, list);
1187 res = MAX(res, br->sect_pressure);
1190 /* finally consider the front pressure distance and add the reference line (the global block pressure) */
1191 return MAX(res, bi->front_pressure);
1194 #define block_last_bring_in(bi) list_entry((bi)->br_head.prev, bring_in_t, list)
1197 static int get_block_max_pressure(const block_info_t *bi)
1199 int max = get_max_pressure_so_far(bi, block_last_bring_in(bi));
1200 return MAX(bi->pressure, max);
1205 * Try to bring a variable into a block.
1206 * @param ges The state of all end sets.
1207 * @param block The block.
1208 * @param irn The variable.
1210 static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
1212 block_info_t *bi = br->bi;
1213 ir_node *irn = br->irn;
1214 ir_node *bl = bi->bl;
1215 belady_env_t *env = ges->env;
1216 void *reset_level = obstack_base(&ges->obst);
1217 int k = env->n_regs;
1218 int pressure_upto_use = get_max_pressure_so_far(bi, br);
1219 int front_pressure = bi->front_pressure;
1220 ir_node *better_spill_loc = NULL;
1222 assert(front_pressure <= k);
1223 assert(pressure_upto_use <= k);
1225 DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n",
1226 irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use));
1228 // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn));
1231 * if we cannot bring the value to the use, let's see if it would be worthwhile
1232 * to bring the value to the beginning of the block to have a better spill
1235 * better _spilled_here will return a node where the value can be spilled after
1236 * or NULL if this block does not provide a better spill location.
1239 if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn))
1240 better_spill_loc = better_spilled_here(br);
1244 * If either we can bring the value to the use or we should try
1245 * to bring it here to do the spill here, let's try to bring it in.
1247 if (better_spill_loc || pressure_upto_use < k) {
1249 double bring_in_costs, local_costs;
1250 rollback_info_t trans;
1253 /* process all variables which shall be in a reg at
1254 * the beginning of the block in the order of the next use. */
1255 local_costs = be_get_reload_costs(env->senv, irn, br->first_use);
1257 /* reset the lists */
1261 /* if the variable will live into this block, we must adapt the pressure.
1262 * The new pressure is the MAX of:
1263 * 1) the total block pressure
1264 * 2) the pressure so far + the front pressure increase + 1
1266 * If the second is larger than the first,
1267 * we have to increment the total block pressure and hence
1268 * save the old pressure to restore it in case of failing to
1269 * bring the variable into the block in a register.
1271 trans = trans_begin(ges);
1272 bs = new_block_state(ges, bi);
1273 pressure_inc = MAX(bs->pressure, better_spill_loc ? front_pressure : pressure_upto_use + 1);
1274 bs->pressure = pressure_inc;
1277 assert(bi->pressure <= k);
1278 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1279 bring_in_costs = can_bring_in(ges, bl, irn, local_costs, 1);
1280 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f\n", bring_in_costs, local_costs));
1283 * Following cases can now occur:
1284 * 1) There is room and costs ok
1285 * 2) Cannot bring to use but can spill at begin and costs are ok
1286 * 3) neither of both worked.
1288 * following actions can be taken:
1290 * b) mark phi as succeeded if node was phi
1291 * c) insert reload at use location
1292 * d) give a spill location hint
1294 * this is the case/action matrix
1302 /* the costs were acceptable... */
1303 if (bring_in_costs < local_costs) {
1308 * case 1 and first part of case 2:
1309 * commit all the changes done. this manifests the bring-in action.
1310 * if the transport-in was a phi (that is actually used in block)
1311 * mark it in the succ_phis set to *not* phi spill it.
1313 materialize_and_commit_end_state(ges);
1314 if (is_local_phi(bl, irn))
1315 bitset_add_irn(ges->succ_phis, irn);
1317 DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc));
1319 /* second half of case 2 */
1320 if (pressure_upto_use >= k) {
1321 DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n",
1322 br->first_use, better_spill_loc));
1323 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1324 be_add_spill(env->senv, irn, better_spill_loc);
1325 ir_nodeset_insert(env->extra_spilled, irn);
1329 * go from the last bring in use to the first and add all the variables
1330 * which additionally live through the block to their pressure.
1331 * at the point were the actually treated use is, we have to increase
1332 * the pressure by one more as the brought in value starts to count.
1333 * Finally, adjust the front pressure as well.
1336 list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) {
1338 pressure_inc += pressure_upto_use < k;
1339 iter->sect_pressure += pressure_inc;
1340 check = MAX(check, iter->sect_pressure);
1341 DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure));
1343 bi->front_pressure += pressure_inc;
1344 assert(MAX(check, bi->front_pressure) <= bi->pressure);
1345 DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure));
1348 /* case 3: nothing worked. insert normal reload and rollback. */
1350 DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use));
1351 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1352 bitset_add_irn(env->spilled, irn);
1353 trans_rollback(ges, &trans);
1357 /* there was no opportunity for optimization at all. reload and be sad ... */
1359 DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use));
1360 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1361 bitset_add_irn(env->spilled, irn);
1364 DBG((dbg, DBG_GLOBAL, "\n"));
1366 /* reset the obstack and create a new version. */
1367 obstack_free(&ges->obst, reset_level);
1368 ges->version = ver_make_newer(ges->version);
1371 static bring_in_t **determine_global_order(belady_env_t *env)
1377 for (i = env->n_blocks - 1; i >= 0; --i) {
1378 block_info_t *bi = get_block_info(env->blocks[i]);
1379 list_for_each_entry(bring_in_t, elm, &bi->br_head, list) {
1380 obstack_ptr_grow(&env->ob, elm);
1385 obstack_ptr_grow(&env->ob, NULL);
1386 res = (bring_in_t**)obstack_finish(&env->ob);
1387 qsort(res, n, sizeof(res[0]), bring_in_cmp);
1394 static void global_assign(belady_env_t *env)
1396 ir_nodeset_iterator_t iter;
1397 global_end_state_t ges;
1403 * sort the blocks according to execution frequency.
1404 * That's not necessary for belady() but for the global pass later on.
1406 qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_dfs_gt);
1408 memset(&ges, 0, sizeof(ges));
1409 obstack_init(&ges.obst);
1411 ges.version = ver_make_newer(ver_oldest);
1412 ges.succ_phis = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1413 ges.committed = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1414 ges.bs_tops = OALLOCN(&ges.obst, block_state_t*, env->n_blocks);
1415 ges.bs_tops_vers = OALLOCN(&ges.obst, unsigned, env->n_blocks);
1417 /* invalidate all state stack pointer versions */
1418 for (i = 0; i < env->n_blocks; ++i) {
1419 block_info_t *bi = get_block_info(env->blocks[i]);
1420 ges.bs_tops_vers[i] = ver_oldest;
1422 /* Set all block end sets entries to the youngest version */
1423 for (j = workset_get_length(bi->ws_end) - 1; j >= 0; --j)
1424 workset_set_version(bi->ws_end, j, ver_youngest);
1427 /* determine order and optimize them */
1428 for (br = determine_global_order(env); *br; ++br)
1429 optimize_variable(&ges, *br);
1432 * Now we spill phis which cannot be kept since they were replaced
1433 * by reloads at the block entrances.
1435 for (i = 0; i < env->n_blocks; ++i) {
1436 ir_node *bl = env->blocks[i];
1439 sched_foreach(bl, irn) {
1443 if (arch_irn_consider_in_reg_alloc(env->cls, irn)
1444 && !bitset_contains_irn(ges.succ_phis, irn))
1445 be_spill_phi(env->senv, irn);
1449 /* check dominance for specially spilled nodes. */
1450 foreach_ir_nodeset (env->extra_spilled, irn, iter)
1451 make_spill_locations_dominate_irn(env->senv, irn);
1454 static void collect_blocks(ir_node *bl, void *data)
1456 belady_env_t *env = (belady_env_t*)data;
1458 obstack_ptr_grow(&env->ob, bl);
1462 * Do spilling for a register class on a graph using the belady heuristic.
1463 * In the transformed graph, the register pressure never exceeds the number
1464 * of available registers.
1466 * @param irg The graph
1467 * @param cls The register class to spill
1469 static void be_spill_belady(ir_graph *irg, const arch_register_class_t *cls)
1474 /* some special classes contain only ignore regs, nothing to do then */
1475 n_regs = be_get_n_allocatable_regs(irg, cls);
1479 be_clear_links(irg);
1481 /* init belady env */
1482 obstack_init(&env.ob);
1484 env.arch = be_get_irg_arch_env(irg);
1486 env.lv = be_get_irg_liveness(irg);
1487 env.dfs = env.lv->dfs;
1488 env.n_regs = n_regs;
1489 env.ws = new_workset(&env, &env.ob);
1490 env.senv = be_new_spill_env(irg);
1491 env.ef = be_get_irg_exec_freq(irg);
1492 env.spilled = bitset_irg_obstack_alloc(&env.ob, irg);
1493 env.extra_spilled = ir_nodeset_new(64);
1496 irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1497 obstack_ptr_grow(&env.ob, NULL);
1498 env.blocks = (ir_node**)obstack_finish(&env.ob);
1500 /* renumbering in the blocks gives nicer debug output as number are smaller. */
1501 #ifdef DEBUG_libfirm
1502 for (i = 0; i < env.n_blocks; ++i)
1503 sched_renumber(env.blocks[i]);
1506 /* Fix high register pressure in blocks with belady algorithm */
1507 for (i = 0; i < env.n_blocks; ++i)
1510 global_assign(&env);
1512 /* check dominance for specially spilled nodes. */
1514 ir_nodeset_iterator_t iter;
1517 foreach_ir_nodeset (env.extra_spilled, irn, iter)
1518 make_spill_locations_dominate_irn(env.senv, irn);
1521 /* Insert spill/reload nodes into the graph and fix usages */
1522 be_insert_spills_reloads(env.senv);
1525 be_delete_spill_env(env.senv);
1526 ir_nodeset_del(env.extra_spilled);
1528 obstack_free(&env.ob, NULL);
1531 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);
1532 void be_init_spillbelady2(void)
1534 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
1535 lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
1536 lc_opt_entry_t *bel2_grp = lc_opt_get_grp(spill_grp, "belady2");
1538 static be_spiller_t belady_spiller = {
1542 lc_opt_add_table(bel2_grp, options);
1543 be_register_spiller("belady2", &belady_spiller);
1544 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");