2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Beladys spillalgorithm.
23 * @author Daniel Grund, Matthias Braun
32 #include "irprintf_t.h"
38 #include "iredges_t.h"
41 #include "irnodeset.h"
47 #include "bespillbelady.h"
49 #include "besched_t.h"
53 #include "bechordal_t.h"
54 #include "bespilloptions.h"
55 #include "beloopana.h"
66 #define DBG_WORKSET 128
67 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
70 value_not_reloaded, /* the value has not been reloaded */
71 value_partially_reloaded, /* the value has been reloaded on some paths */
72 value_reloaded /* the value has been reloaded on all paths */
76 * An association between a node and a point in time.
78 typedef struct loc_t {
80 unsigned time; /**< A use time (see beuses.h). */
81 reloaded_state_t reloaded; /**< the value is a reloaded value */
84 typedef struct _workset_t {
85 int len; /**< current length */
86 loc_t vals[0]; /**< inlined array of the values/distances in this working set */
89 static struct obstack obst;
90 static const arch_env_t *arch_env;
91 static const arch_register_class_t *cls;
92 static const be_lv_t *lv;
93 static be_loopana_t *loop_ana;
95 static workset_t *ws; /**< the main workset used while
96 processing a block. */
97 static be_uses_t *uses; /**< env for the next-use magic */
98 static ir_node *instr; /**< current instruction */
99 static unsigned instr_nr; /**< current instruction number
100 (relative to block start) */
101 static ir_nodeset_t used;
102 static spill_env_t *senv; /**< see bespill.h */
103 static pdeq *worklist;
105 static int loc_compare(const void *a, const void *b)
109 return p->time - q->time;
112 void workset_print(const workset_t *w)
116 for(i = 0; i < w->len; ++i) {
117 ir_fprintf(stderr, "%+F %d\n", w->vals[i].node, w->vals[i].time);
122 * Alloc a new workset on obstack @p ob with maximum size @p max
124 static workset_t *new_workset(void)
127 size_t size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
129 res = obstack_alloc(&obst, size);
130 memset(res, 0, size);
135 * Alloc a new instance on obstack and make it equal to @param workset
137 static workset_t *workset_clone(workset_t *workset)
140 size_t size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
141 res = obstack_alloc(&obst, size);
142 memcpy(res, workset, size);
147 * Copy workset @param src to @param tgt
149 static void workset_copy(workset_t *dest, const workset_t *src)
151 size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
152 memcpy(dest, src, size);
156 * Overwrites the current content array of @param ws with the
157 * @param count locations given at memory @param locs.
158 * Set the length of @param ws to count.
160 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
162 workset->len = count;
163 memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
167 * Inserts the value @p val into the workset, iff it is not
168 * already contained. The workset must not be full.
170 static void workset_insert(workset_t *workset, ir_node *val, int reloaded)
174 /* check for current regclass */
175 assert(arch_irn_consider_in_reg_alloc(arch_env, cls, val));
177 /* check if val is already contained */
178 for (i = 0; i < workset->len; ++i) {
179 loc = &workset->vals[i];
180 if (loc->node == val) {
182 loc->reloaded = reloaded;
189 assert(workset->len < n_regs && "Workset already full!");
190 loc = &workset->vals[workset->len];
192 loc->reloaded = reloaded;
193 loc->time = 6666; /* undefined yet */
198 * Removes all entries from this workset
200 static void workset_clear(workset_t *workset)
206 * Removes the value @p val from the workset if present.
208 static INLINE void workset_remove(workset_t *workset, ir_node *val)
211 for(i = 0; i < workset->len; ++i) {
212 if (workset->vals[i].node == val) {
213 workset->vals[i] = workset->vals[--workset->len];
219 static INLINE int workset_contains(const workset_t *ws, const ir_node *val)
223 for(i=0; i<ws->len; ++i) {
224 if (ws->vals[i].node == val)
232 * Iterates over all values in the working set.
233 * @p ws The workset to iterate
234 * @p v A variable to put the current value in
235 * @p i An integer for internal use
237 #define workset_foreach(ws, v, i) for(i=0; \
238 v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; \
241 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
242 #define workset_get_time(ws, i) (ws)->vals[i].time
243 #define workset_set_length(ws, length) (ws)->len = length
244 #define workset_get_length(ws) ((ws)->len)
245 #define workset_get_val(ws, i) ((ws)->vals[i].node)
246 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
248 typedef struct _block_info_t
250 workset_t *start_workset;
251 workset_t *end_workset;
255 static void *new_block_info(void)
257 block_info_t *res = obstack_alloc(&obst, sizeof(res[0]));
258 memset(res, 0, sizeof(res[0]));
263 #define get_block_info(block) ((block_info_t *)get_irn_link(block))
264 #define set_block_info(block, info) set_irn_link(block, info)
267 * @return The distance to the next use or 0 if irn has dont_spill flag set
269 static INLINE unsigned get_distance(ir_node *from, unsigned from_step,
270 const ir_node *def, int skip_from_uses)
273 int flags = arch_irn_get_flags(arch_env, def);
276 assert(! (flags & arch_irn_flags_ignore));
278 use = be_get_next_use(uses, from, from_step, def, skip_from_uses);
279 if(USES_IS_INFINITE(use.time))
280 return USES_INFINITY;
282 /* We have to keep nonspillable nodes in the workingset */
283 if(flags & arch_irn_flags_dont_spill)
287 time += be_get_reload_costs_no_weight(senv, def, use.before) * 10;
293 * Performs the actions necessary to grant the request that:
294 * - new_vals can be held in registers
295 * - as few as possible other values are disposed
296 * - the worst values get disposed
298 * @p is_usage indicates that the values in new_vals are used (not defined)
299 * In this case reloads must be performed
301 static void displace(workset_t *new_vals, int is_usage)
304 int i, len, max_allowed, demand, iter;
306 ir_node **to_insert = alloca(n_regs * sizeof(to_insert[0]));
309 1. Identify the number of needed slots and the values to reload
312 workset_foreach(new_vals, val, iter) {
313 /* mark value as used */
315 ir_nodeset_insert(&used, val);
317 if (! workset_contains(ws, val)) {
318 DBG((dbg, DBG_DECIDE, " insert %+F\n", val));
319 to_insert[demand++] = val;
321 DBG((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
322 be_add_reload(senv, val, instr, cls, 1);
325 DBG((dbg, DBG_DECIDE, " %+F already in workset\n", val));
331 2. Make room for at least 'demand' slots
333 len = workset_get_length(ws);
334 max_allowed = n_regs - demand;
336 /* Only make more free room if we do not have enough */
337 if (len > max_allowed) {
338 DBG((dbg, DBG_DECIDE, " disposing %d values\n",
339 ws->len - max_allowed));
341 /* get current next-use distance */
342 for (i = 0; i < ws->len; ++i) {
343 ir_node *val = workset_get_val(ws, i);
344 unsigned dist = get_distance(instr, instr_nr, val, !is_usage);
345 workset_set_time(ws, i, dist);
348 /* sort entries by increasing nextuse-distance*/
352 Logic for not needed live-ins: If a value is disposed
353 before its first usage, remove it from start workset
354 We don't do this for phis though
356 for (i = max_allowed; i < ws->len; ++i) {
357 ir_node *node = ws->vals[i].node;
359 DBG((dbg, DBG_DECIDE, " disposing node %+F (%u)\n", node,
360 workset_get_time(ws, i)));
362 if(!USES_IS_INFINITE(ws->vals[i].time)
363 && !ws->vals[i].reloaded) {
364 //be_add_spill(senv, node, instr);
370 if (! ir_nodeset_contains(&used, node)) {
371 ir_node *curr_bb = get_nodes_block(instr);
372 workset_t *ws_start = get_block_info(curr_bb)->start_workset;
373 workset_remove(ws_start, node);
375 DBG((dbg, DBG_DECIDE, " (and removing %+F from start workset)\n", node));
379 /* kill the last 'demand' entries in the array */
380 workset_set_length(ws, max_allowed);
384 3. Insert the new values into the workset
386 for (i = 0; i < demand; ++i)
387 workset_insert(ws, to_insert[i], 1);
390 /** Decides whether a specific node should be in the start workset or not
392 * @param env belady environment
394 * @param node the node to test
395 * @param loop the loop of the node
397 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
400 be_next_use_t next_use;
403 loc.time = USES_INFINITY;
405 //loc.reloaded = rand() % 2; /* provoke a bug... */
408 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
409 loc.time = USES_INFINITY;
413 /* We have to keep nonspillable nodes in the workingset */
414 if(arch_irn_get_flags(arch_env, node) & arch_irn_flags_dont_spill) {
416 DBG((dbg, DBG_START, " %+F taken (dontspill node)\n", node, loc.time));
420 next_use = be_get_next_use(uses, first, 0, node, 0);
421 if(USES_IS_INFINITE(next_use.time)) {
422 // the nodes marked as live in shouldn't be dead, so it must be a phi
423 assert(is_Phi(node));
424 loc.time = USES_INFINITY;
425 DBG((dbg, DBG_START, " %+F not taken (dead)\n", node));
427 be_spill_phi(senv, node);
432 loc.time = next_use.time;
434 if(next_use.outermost_loop >= get_loop_depth(loop)) {
435 DBG((dbg, DBG_START, " %+F taken (%u, loop %d)\n", node, loc.time, next_use.outermost_loop));
437 loc.time = USES_PENDING;
438 DBG((dbg, DBG_START, " %+F delayed (outerloopdepth %d < loopdetph %d)\n", node, next_use.outermost_loop, get_loop_depth(loop)));
444 * Computes the start-workset for a block with multiple predecessors. We assume
445 * that at least 1 of the predeccesors is a back-edge which means we're at the
446 * beginning of a loop. We try to reload as much values as possible now so they
447 * don't get reloaded inside the loop.
449 static void compute_live_ins(const ir_node *block)
451 ir_loop *loop = get_irn_loop(block);
457 int i, len, ws_count;
458 int free_slots, free_pressure_slots;
461 //int n_pred_worksets;
462 //workset_t **pred_worksets;
464 /* Collect all values living at start of block */
465 starters = NEW_ARR_F(loc_t, 0);
466 delayed = NEW_ARR_F(loc_t, 0);
468 DBG((dbg, DBG_START, "Living at start of %+F:\n", block));
469 first = sched_first(block);
471 /* check all Phis first */
472 sched_foreach(block, node) {
476 loc = to_take_or_not_to_take(first, node, loop);
478 if (! USES_IS_INFINITE(loc.time)) {
479 if (USES_IS_PENDING(loc.time))
480 ARR_APP1(loc_t, delayed, loc);
482 ARR_APP1(loc_t, starters, loc);
486 /* check all Live-Ins */
487 be_lv_foreach(lv, block, be_lv_state_in, i) {
488 ir_node *node = be_lv_get_irn(lv, block, i);
490 loc = to_take_or_not_to_take(first, node, loop);
492 if (! USES_IS_INFINITE(loc.time)) {
493 if (USES_IS_PENDING(loc.time))
494 ARR_APP1(loc_t, delayed, loc);
496 ARR_APP1(loc_t, starters, loc);
500 pressure = be_get_loop_pressure(loop_ana, cls, loop);
501 assert(ARR_LEN(delayed) <= (signed)pressure);
502 free_slots = n_regs - ARR_LEN(starters);
503 free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
504 free_slots = MIN(free_slots, free_pressure_slots);
506 /* so far we only put nodes into the starters list that are used inside
507 * the loop. If register pressure in the loop is low then we can take some
508 * values and let them live through the loop */
510 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
512 for (i = 0; i < ARR_LEN(delayed) && i < free_slots; ++i) {
513 DBG((dbg, DBG_START, " delayed %+F taken\n", delayed[i].node));
514 ARR_APP1(loc_t, starters, delayed[i]);
515 delayed[i].node = NULL;
519 /* spill phis (the actual phis not just their values) that are in this block
520 * but not in the start workset */
521 for (i = ARR_LEN(delayed) - 1; i >= 0; --i) {
522 ir_node *node = delayed[i].node;
523 if(node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
526 DBG((dbg, DBG_START, " spilling delayed phi %+F\n", node));
527 be_spill_phi(senv, node);
531 /* Sort start values by first use */
532 qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
534 /* Copy the best ones from starters to start workset */
535 ws_count = MIN(ARR_LEN(starters), n_regs);
537 workset_bulk_fill(ws, ws_count, starters);
539 /* spill phis (the actual phis not just their values) that are in this block
540 * but not in the start workset */
541 len = ARR_LEN(starters);
542 for (i = ws_count; i < len; ++i) {
543 ir_node *node = starters[i].node;
544 if (! is_Phi(node) || get_nodes_block(node) != block)
547 DBG((dbg, DBG_START, " spilling phi %+F\n", node));
548 be_spill_phi(senv, node);
554 /* determine reloaded status of the values: If there's 1 pred block (which
555 * is no backedge) where the value is reloaded then we must set it to
556 * reloaded here. We place spills in all pred where the value was not yet
557 * reloaded to be sure we have a spill on each path */
559 arity = get_irn_arity(block);
560 pred_worksets = alloca(sizeof(pred_worksets[0]) * arity);
561 for(i = 0; i < arity; ++i) {
562 ir_node *pred_block = get_Block_cfgpred_block(block, i);
563 block_info_t *pred_info = get_block_info(pred_block);
564 if(pred_info == NULL)
567 pred_worksets[n_pred_worksets] = pred_info->end_workset;
571 for(i = 0; i < ws_count; ++i) {
572 loc_t *loc = &ws->vals[i];
573 ir_node *value = loc->node;
577 /* phis from this block aren't reloaded */
578 if(get_nodes_block(value) == block) {
579 assert(is_Phi(value));
580 loc->reloaded = value_not_reloaded;
584 /* was the value reloaded on any of the other inputs */
586 arity = get_Block_n_cfgpreds(block);
587 for(n = 0; n < n_pred_worksets; ++n) {
588 workset_t *pred_workset = pred_worksets[n];
589 int p_len = workset_get_length(pred_workset);
592 for(p = 0; p < p_len; ++p) {
593 loc_t *l = &pred_workset->vals[p];
594 if(l->node == value) {
611 * For the given block @p block, decide for each values
612 * whether it is used from a register or is reloaded
615 static void belady(ir_node *block)
620 block_info_t *block_info;
622 int has_backedges = 0;
624 const ir_edge_t *edge;
626 /* no need to process a block twice */
627 if(get_block_info(block) != NULL) {
631 /* check if all predecessor blocks are processed yet (though for backedges
632 * we have to make an exception as we can't process them first) */
633 arity = get_Block_n_cfgpreds(block);
634 for(i = 0; i < arity; ++i) {
635 ir_node *pred_block = get_Block_cfgpred_block(block, i);
636 block_info_t *pred_info = get_block_info(pred_block);
638 if(pred_info == NULL) {
639 /* process predecessor first (it will be in the queue already) */
640 if(!is_backedge(block, i)) {
646 (void) has_backedges;
649 } else if(arity == 1) {
650 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
651 block_info_t *pred_info = get_block_info(pred_block);
653 assert(pred_info != NULL);
654 workset_copy(ws, pred_info->end_workset);
656 /* we need 2 heuristics here, for the case when all predecessor blocks
657 * are known and when some are backedges (and therefore can't be known
659 compute_live_ins(block);
662 DBG((dbg, DBG_DECIDE, "\n"));
663 DBG((dbg, DBG_DECIDE, "Decide for %+F\n", block));
665 block_info = new_block_info();
666 set_block_info(block, block_info);
668 DBG((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
669 workset_foreach(ws, irn, iter) {
670 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn,
671 workset_get_time(ws, iter)));
674 block_info->start_workset = workset_clone(ws);
676 /* process the block from start to end */
677 DBG((dbg, DBG_WSETS, "Processing...\n"));
678 ir_nodeset_init(&used);
680 /* TODO: this leaks (into the obstack)... */
681 new_vals = new_workset();
683 sched_foreach(block, irn) {
685 assert(workset_get_length(ws) <= n_regs);
687 /* Phis are no real instr (see insert_starters()) */
691 DBG((dbg, DBG_DECIDE, " ...%+F\n", irn));
693 /* set instruction in the workset */
696 /* allocate all values _used_ by this instruction */
697 workset_clear(new_vals);
698 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
699 ir_node *in = get_irn_n(irn, i);
700 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, in))
703 /* (note that reloaded_value is irrelevant here) */
704 workset_insert(new_vals, in, 0);
706 displace(new_vals, 1);
708 /* allocate all values _defined_ by this instruction */
709 workset_clear(new_vals);
710 if (get_irn_mode(irn) == mode_T) {
711 const ir_edge_t *edge;
713 foreach_out_edge(irn, edge) {
714 ir_node *proj = get_edge_src_irn(edge);
715 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
717 workset_insert(new_vals, proj, 0);
720 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, irn))
722 workset_insert(new_vals, irn, 0);
724 displace(new_vals, 0);
728 ir_nodeset_destroy(&used);
730 /* Remember end-workset for this block */
731 block_info->end_workset = workset_clone(ws);
732 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
733 workset_foreach(ws, irn, iter)
734 DBG((dbg, DBG_WSETS, " %+F (%u)\n", irn,
735 workset_get_time(ws, iter)));
737 /* add successor blocks into worklist */
738 foreach_block_succ(block, edge) {
739 ir_node *succ = get_edge_src_irn(edge);
740 pdeq_putr(worklist, succ);
745 * 'decide' is block-local and makes assumptions
746 * about the set of live-ins. Thus we must adapt the
747 * live-outs to the live-ins at each block-border.
749 static void fix_block_borders(ir_node *block, void *data)
751 workset_t *start_workset;
757 DBG((dbg, DBG_FIX, "\n"));
758 DBG((dbg, DBG_FIX, "Fixing %+F\n", block));
760 start_workset = get_block_info(block)->start_workset;
762 /* process all pred blocks */
763 arity = get_irn_arity(block);
764 for (i = 0; i < arity; ++i) {
765 ir_node *pred = get_Block_cfgpred_block(block, i);
766 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
769 DBG((dbg, DBG_FIX, " Pred %+F\n", pred));
771 /* spill all values not used anymore */
772 workset_foreach(pred_end_workset, node, iter) {
776 workset_foreach(start_workset, n2, iter2) {
781 /* note that we do not look at phi inputs, becuase the values
782 * will be either live-end and need no spill or
783 * they have other users in which must be somewhere else in the
788 if(!found && be_is_live_out(lv, pred, node)
789 && !pred_end_workset->vals[iter].reloaded) {
790 ir_node *insert_point
791 = be_get_end_of_block_insertion_point(pred);
792 DBG((dbg, DBG_SPILL, "Spill %+F before %+F\n", node,
794 be_add_spill(senv, node, insert_point);
799 /* reload missing values in predecessors */
800 workset_foreach(start_workset, node, iter) {
801 /* if node is a phi of the current block we reload
802 * the corresponding argument, else node itself */
803 if(is_Phi(node) && block == get_nodes_block(node)) {
804 node = get_irn_n(node, i);
806 /* we might have unknowns as argument for the phi */
807 if(!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
811 /* check if node is in a register at end of pred */
812 if(workset_contains(pred_end_workset, node))
815 /* node is not in memory at the end of pred -> reload it */
816 DBG((dbg, DBG_FIX, " reload %+F\n", node));
817 DBG((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
818 be_add_reload_on_edge(senv, node, block, i, cls, 1);
823 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
825 ir_graph *irg = be_get_birg_irg(birg);
827 be_liveness_assure_sets(be_assure_liveness(birg));
829 /* construct control flow loop tree */
830 if(! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
831 construct_cf_backedges(irg);
836 /* init belady env */
838 arch_env = birg->main_env->arch_env;
840 lv = be_get_birg_liveness(birg);
841 n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
843 uses = be_begin_uses(irg, lv);
844 loop_ana = be_new_loop_pressure(birg);
845 senv = be_new_spill_env(birg);
846 worklist = new_pdeq();
848 pdeq_putr(worklist, get_irg_start_block(irg));
850 while(!pdeq_empty(worklist)) {
851 ir_node *block = pdeq_getl(worklist);
854 /* end block might not be reachable in endless loops */
855 belady(get_irg_end_block(irg));
859 /* belady was block-local, fix the global flow by adding reloads on the
861 irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
863 /* Insert spill/reload nodes into the graph and fix usages */
864 be_insert_spills_reloads(senv);
867 be_delete_spill_env(senv);
869 be_free_loop_pressure(loop_ana);
870 obstack_free(&obst, NULL);
873 void be_init_spillbelady(void)
875 static be_spiller_t belady_spiller = {
879 be_register_spiller("belady", &belady_spiller);
880 FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
883 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);