4 * Copyright: (c) Universitaet Karlsruhe
5 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
17 #include "iredges_t.h"
21 #include "bespillbelady.h"
23 #include "besched_t.h"
28 #define MIN(a,b) (((a)<(b))?(a):(b))
35 #define DEBUG_LVL SET_LEVEL_0 //(DBG_DECIDE | DBG_WSETS | DBG_FIX | DBG_SPILL)
36 static firm_dbg_module_t *dbg = NULL;
38 typedef struct _workset_t workset_t;
39 typedef struct _block_info_t block_info_t;
40 typedef struct _reloader_t reloader_t;
41 typedef struct _spill_info_t spill_info_t;
42 typedef struct _belady_env_t belady_env_t;
46 int i; /**< used for iteration */
47 int len; /**< current length */
48 loc_t vals[1]; /**< inlined array of the values/distances in this working set */
51 struct _block_info_t {
52 workset_t *ws_start, *ws_end;
60 struct _spill_info_t {
61 ir_node *spilled_node;
62 reloader_t *reloaders;
65 struct _belady_env_t {
67 const be_node_factory_t *factory;
68 const arch_env_t *arch;
69 const arch_register_class_t *cls;
70 int n_regs; /** number of regs in this reg-class */
72 workset_t *ws; /**< the main workset used while processing a block. ob-allocated */
73 be_uses_t *uses; /**< env for the next-use magic */
74 ir_node *instr; /**< current instruction */
75 unsigned instr_nr; /**< current instruction number (relative to block start) */
76 pset *used; /**< holds the values used (so far) in the current BB */
77 set *spills; /**< all spill_info_t's, which must be placed */
79 spill_env_t senv; /* see bespill.h */
80 pset *reloads; /**< all reload nodes placed */
83 static int set_cmp_spillinfo(const void *x, const void *y, size_t size) {
84 const spill_info_t *xx = x;
85 const spill_info_t *yy = y;
86 return ! (xx->spilled_node == yy->spilled_node);
91 * Alloc a new workset on obstack @p ob with maximum size @p max
93 static INLINE workset_t *new_workset(struct obstack *ob, belady_env_t *bel) {
95 size_t size = sizeof(*res) + (bel->n_regs-1)*sizeof(res->vals[0]);
96 res = obstack_alloc(ob, size);
102 static INLINE workset_t *workset_clone(struct obstack *ob, workset_t *ws) {
104 size_t size = sizeof(*res) + (ws->bel->n_regs-1)*sizeof(res->vals[0]);
105 res = obstack_alloc(ob, size);
106 memcpy(res, ws, size);
111 * Inserts the value @p val into the workset, iff it is not
112 * already contained. The workset must not be full.
114 static INLINE void workset_insert(workset_t *ws, ir_node *val) {
116 assert(ws->len < ws->bel->n_regs && "Workset already full!");
117 /* check for current regclass */
118 if (arch_get_irn_reg_class(ws->bel->arch, val, 0) != ws->bel->cls) {
119 DBG((dbg, 0, "Dropped %+F\n", val));
123 /* check if val is already contained */
124 for(i=0; i<ws->len; ++i)
125 if (ws->vals[i].irn == val)
129 ws->vals[ws->len++].irn = val;
133 * Inserts all values in array @p vals of length @p cnt
134 * into the workset. There must be enough space for the
137 static INLINE void workset_bulk_insert(workset_t *ws, int cnt, ir_node **vals) {
139 assert(ws->len + cnt <= ws->bel->n_regs && "Workset does not have enough room!");
141 for(o=0; o<cnt; ++o) {
142 ir_node *val = vals[o];
143 DBG((dbg, DBG_TRACE, "Bulk insert %+F\n", val));
144 /* check for current regclass */
145 if (arch_get_irn_reg_class(ws->bel->arch, val, 0) != ws->bel->cls) {
146 DBG((dbg, DBG_TRACE, "Wrong reg class\n"));
150 /* check if val is already contained */
151 for(i=0; i<ws->len; ++i)
152 if (ws->vals[i].irn == val) {
153 DBG((dbg, DBG_TRACE, "Already contained\n"));
158 ws->vals[ws->len++].irn = val;
159 DBG((dbg, DBG_TRACE, "Inserted\n"));
162 /*epsilon statement :)*/;
167 * Overwrites the current contents of @p ws with the
168 * locations given in @p locs
170 #define workset_bulk_fill(ws, count, locs) memcpy(&(ws)->vals[0], locs, ((ws)->len=count)*sizeof(locs[0]));
173 * Removes all entries from this workset
175 #define workset_clear(ws) (ws)->len = 0;
178 * Removes the value @p val from the workset if present.
180 static INLINE void workset_remove(workset_t *ws, ir_node *val) {
182 for(i=0; i<ws->len; ++i)
183 if (ws->vals[i].irn == val) {
184 ws->vals[i] = ws->vals[--ws->len];
189 static INLINE int workset_contains(const workset_t *ws, ir_node *val) {
191 for(i=0; i<ws->len; ++i)
192 if (ws->vals[i].irn == val)
197 #define workset_foreach(ws, v) for(ws->i=0; \
198 v=(ws->i < ws->len) ? ws->vals[ws->i].irn : NULL, ws->i < ws->len; \
201 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
202 #define workset_set_length(ws, length) (ws)->len = length
203 #define workset_get_length(ws) ((ws)->len)
204 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
205 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
209 * Collects all values live-in at block @p blk and all phi results in this block.
210 * Then it adds the best values (at most n_regs) to the ws.
212 static void build_start_set(belady_env_t *bel, ir_node *blk) {
213 workset_t *ws = bel->ws;
214 ir_node *irn, *first;
217 loc_t loc, *starters;
223 first = sched_first(blk);
225 sched_foreach(blk, irn)
226 if (is_Phi(irn) && arch_get_irn_reg_class(bel->arch, irn, 0) == bel->cls) {
228 loc.time = be_get_next_use(bel->uses, first, 0, irn, 1);
229 obstack_grow(&ob, &loc, sizeof(loc));
234 live_foreach(blk, li)
235 if (live_is_in(li) && arch_get_irn_reg_class(bel->arch, li->irn, 0) == bel->cls) {
236 loc.irn = (ir_node *)li->irn;
237 loc.time = be_get_next_use(bel->uses, first, 0, li->irn, 1);
238 obstack_grow(&ob, &loc, sizeof(loc));
241 starters = obstack_finish(&ob);
243 /* sort all values */
244 qsort(starters, count, sizeof(starters[0]), loc_compare);
246 /* copy the best ones to the ws */
247 count = MIN(count, ws->bel->n_regs);
248 workset_bulk_fill(ws, count, starters);
250 obstack_free(&ob, NULL);
254 * Performs the actions neccessary to grant the request that:
255 * - new_vals can be held in registers
256 * - as few as possible other values are disposed
257 * - the worst values get disposed
259 * @p is_usage indicates that the values in new_vals are used (not defined)
260 * In this case reloads must be performed
262 static void displace(belady_env_t *bel, workset_t *new_vals, int is_usage) {
264 int i, len, max_allowed, demand;
265 workset_t *ws = bel->ws;
266 ir_node **to_insert = alloca(bel->n_regs * sizeof(*to_insert));
269 * 1. Identify the number of needed slots and the values to reload
272 workset_foreach(new_vals, val) {
273 /* mark value as used */
275 pset_insert_ptr(bel->used, val);
277 if (!workset_contains(ws, val)) {
278 DBG((dbg, DBG_DECIDE, " insert %+F\n", val));
279 to_insert[demand++] = val;
282 spill_info_t si, *found;
285 /* find the spill info or create it */
286 si.spilled_node = val;
288 found = set_insert(bel->spills, &si, sizeof(si), HASH_PTR(si.spilled_node));
290 /* insert the reloader into the linked list */
291 rld = obstack_alloc(&bel->ob, sizeof(*rld));
292 rld->reloader = bel->instr;
293 rld->next = found->reloaders;
294 found->reloaders = rld;
297 DBG((dbg, DBG_DECIDE, " skip %+F\n", val));
299 DBG((dbg, DBG_DECIDE, " demand = %d\n", demand));
303 * 2. Make room for at least 'demand' slots
305 len = workset_get_length(ws);
306 max_allowed = bel->n_regs - demand;
308 /* Only make more free room if we do not have enough */
309 if (len > max_allowed) {
310 /* get current next-use distance */
311 for (i=0; i<ws->len; ++i)
312 workset_set_time(ws, i, be_get_next_use(bel->uses, bel->instr, bel->instr_nr, workset_get_val(ws, i), is_usage));
314 /* sort entries by increasing nextuse-distance*/
317 /* Logic for not needed live-ins: If a value is disposed
318 before its first usage, remove it from start workset */
319 for (i=max_allowed; i<ws->len; ++i) {
320 ir_node *irn = ws->vals[i].irn;
321 if (!pset_find_ptr(bel->used, irn)) {
322 ir_node *curr_bb = get_nodes_block(bel->instr);
323 workset_t *ws_start = ((block_info_t *) get_irn_link(curr_bb))->ws_start;
324 workset_remove(ws_start, irn);
326 DBG((dbg, DBG_DECIDE, " dispose %+F dumb\n", irn));
328 DBG((dbg, DBG_DECIDE, " dispose %+F\n", irn));
331 /* kill the last 'demand' entries in the array */
332 workset_set_length(ws, max_allowed);
336 * 3. Insert the new values into the workset
338 workset_bulk_insert(bel->ws, demand, to_insert);
342 * For the given block @p blk, decide for each values
343 * whether it is used from a register or is reloaded
346 static void decide(ir_node *blk, void *env) {
347 belady_env_t *bel = env;
350 block_info_t *blk_info = obstack_alloc(&bel->ob, sizeof(*blk_info));
351 set_irn_link(blk, blk_info);
353 DBG((dbg, DBG_DECIDE, "\n"));
354 DBG((dbg, DBG_DECIDE, "Decide for %+F\n", blk));
356 /* build starting-workset for this block */
357 build_start_set(bel, blk);
358 blk_info->ws_start = workset_clone(&bel->ob, bel->ws);
359 DBG((dbg, DBG_WSETS, "Initial start workset for %+F:\n", blk));
360 workset_foreach(blk_info->ws_start, irn)
361 DBG((dbg, DBG_WSETS, " %+F\n", irn));
363 /* process the block from start to end */
364 DBG((dbg, DBG_WSETS, "Processing...\n"));
365 bel->used = pset_new_ptr(32);
367 new_vals = new_workset(&bel->ob, bel);
368 sched_foreach(blk, irn) {
370 DBG((dbg, DBG_WSETS, "Current workset for %+F:\n", blk));
371 workset_foreach(bel->ws, iii)
372 DBG((dbg, DBG_WSETS, " %+F\n", iii));
373 assert(workset_get_length(bel->ws) <= bel->n_regs && "Too much values in workset!");
375 DBG((dbg, DBG_DECIDE, " ...%+F\n", irn));
377 /* projs are handled with the tuple value.
378 * Phis are no real instr (see insert_starters)
379 * instr_nr does not increase */
380 if (is_Proj(irn) || is_Phi(irn))
383 /* set instruction in the workset */
386 /* allocate all values _used_ by this instruction */
387 workset_clear(new_vals);
388 workset_bulk_insert(new_vals, get_irn_arity(irn)+1, get_irn_in(irn));
389 displace(bel, new_vals, 1);
391 /* allocate all values _defined_ by this instruction */
392 workset_clear(new_vals);
393 if (get_irn_mode(irn) == mode_T) { /* special handling for tuples and projs */
395 for(proj=sched_next(irn); is_Proj(proj); proj=sched_next(proj))
396 workset_insert(new_vals, proj);
398 workset_insert(new_vals, irn);
400 displace(bel, new_vals, 0);
406 /* Remember end-workset for this block */
407 blk_info->ws_end = workset_clone(&bel->ob, bel->ws);
408 DBG((dbg, DBG_WSETS, "Start workset for %+F:\n", blk));
409 workset_foreach(blk_info->ws_start, irn)
410 DBG((dbg, DBG_WSETS, " %+F\n", irn));
411 DBG((dbg, DBG_WSETS, "End workset for %+F:\n", blk));
412 workset_foreach(blk_info->ws_end, irn)
413 DBG((dbg, DBG_WSETS, " %+F\n", irn));
417 * 'decide' is block-local and makes assumtions
418 * about the set of live-ins. Thus we must adapt the
419 * live-outs to the live-ins at each block-border.
421 static void fix_block_borders(ir_node *blk, void *env) {
422 belady_env_t *bel = env;
425 DBG((dbg, DBG_FIX, "\n"));
426 DBG((dbg, DBG_FIX, "Fixing %+F\n", blk));
428 workset_t *wsb = ((block_info_t *)get_irn_link(blk))->ws_start;
430 /* process all pred blocks */
431 for (i=0, max=get_irn_arity(blk); i<max; ++i) {
432 ir_node *irnb, *irnp, *pred = get_Block_cfgpred_block(blk, i);
433 workset_t *wsp = ((block_info_t *)get_irn_link(pred))->ws_end;
435 DBG((dbg, DBG_FIX, " Pred %+F\n", pred));
437 workset_foreach(wsb, irnb) {
438 spill_info_t si, *found;
441 /* if irnb is a phi of the current block we reload
442 * the corresponding argument, else irnb itself */
443 if(is_Phi(irnb) && blk == get_nodes_block(irnb))
444 irnb = get_irn_n(irnb, i);
446 /* check if irnb is in a register at end of pred */
447 workset_foreach(wsp, irnp)
451 /* irnb is in memory at the end of pred, so we have to reload it */
453 /* find the spill info or create it */
454 si.spilled_node = irnb;
456 found = set_insert(bel->spills, &si, sizeof(si), HASH_PTR(si.spilled_node));
458 /* insert the reloader into the linked list.
459 * the schedule position depends on the cf-situation of the block */
460 rld = obstack_alloc(&bel->ob, sizeof(*rld));
461 rld->reloader = (max==1) ? sched_skip(sched_first(blk), 1, sched_skip_phi_predicator, NULL) : pred;
462 rld->next = found->reloaders;
463 found->reloaders = rld;
465 DBG((dbg, DBG_FIX, " reload %+F before %+F\n", irnb, rld->reloader));
468 /*epsilon statement :)*/;
473 static void insert_spills_reloads(ir_graph *irg, belady_env_t *bel) {
479 /* get all special spilled phis */
480 for(si = set_first(bel->spills); si; si = set_next(bel->spills)) {
481 irn = si->spilled_node;
483 ir_node *blk = get_nodes_block(irn);
484 workset_t *sws = ((block_info_t *)get_irn_link(blk))->ws_start;
485 if (!workset_contains(sws, irn))
486 pset_insert_ptr(bel->senv.mem_phis, irn);
490 /* process each spilled node */
491 for(si = set_first(bel->spills); si; si = set_next(bel->spills)) {
495 ir_mode *mode = get_irn_mode(si->spilled_node);
497 /* go through all reloads for this spill */
498 for(rld = si->reloaders; rld; rld = rld->next) {
499 /* the spill for this reloader */
500 ir_node *spill = be_spill_node(&bel->senv, si->spilled_node);
503 ir_node *bl = is_Block(rld->reloader) ? rld->reloader : get_nodes_block(rld->reloader);
504 ir_node *reload = new_Reload(bel->factory, bel->cls, irg, bl, mode, spill);
505 DBG((dbg, DBG_SPILL, " RELOADER %+F Reload %+F of %+F\n", rld->reloader, reload, si->spilled_node));
506 pset_insert_ptr(bel->reloads, reload);
508 /* remember the reaload */
509 obstack_ptr_grow(&ob, reload);
510 sched_add_before(rld->reloader, reload);
514 assert(n_reloads > 0);
515 reloads = obstack_finish(&ob);
516 be_introduce_copies_ignore(bel->senv.session->dom_front, si->spilled_node, n_reloads, reloads, bel->senv.mem_phis);
517 obstack_free(&ob, reloads);
520 obstack_free(&ob, NULL);
522 be_remove_spilled_phis(&bel->senv);
526 * Removes all used reloads from bel->reloads.
527 * The remaining nodes in bel->reloads will be removed from the graph.
529 static void rescue_used_reloads(ir_node *irn, void *env) {
530 pset *rlds = ((belady_env_t *)env)->reloads;
531 if (pset_find_ptr(rlds, irn)) {
532 DBG((dbg, DBG_SPILL, "Removing %+F in %+F\n", irn, get_nodes_block(irn)));
533 pset_remove_ptr(rlds, irn);
537 void be_spill_belady(const be_main_session_env_t *session, const arch_register_class_t *cls) {
540 dbg = firm_dbg_register("ir.be.spillbelady");
541 firm_dbg_set_mask(dbg, DEBUG_LVL);
543 /* init belady env */
544 belady_env_t *bel = alloca(sizeof(*bel));
545 obstack_init(&bel->ob);
546 bel->senv.session = session;
547 bel->factory = session->main_env->node_factory;
548 bel->arch = session->main_env->arch_env;
550 bel->n_regs = arch_register_class_n_regs(cls);
551 bel->ws = new_workset(&bel->ob, bel);
552 bel->uses = be_begin_uses(session->irg, session->main_env->arch_env, cls);
553 bel->spills = new_set(set_cmp_spillinfo, 32);
554 bel->senv.spill_ctxs = new_set(be_set_cmp_spillctx, 32);
555 bel->senv.mem_phis = pset_new_ptr_default();
556 bel->reloads = pset_new_ptr_default();
559 irg_block_walk_graph(session->irg, decide, NULL, bel);
560 irg_block_walk_graph(session->irg, fix_block_borders, NULL, bel);
561 insert_spills_reloads(session->irg, bel);
563 /* find all unused reloads and remove them from the schedule */
564 irg_walk_graph(session->irg, rescue_used_reloads, NULL, bel);
565 for(irn = pset_first(bel->reloads); irn; irn = pset_next(bel->reloads))
569 del_pset(bel->reloads);
570 del_pset(bel->senv.mem_phis);
571 del_set(bel->senv.spill_ctxs);
572 del_set(bel->spills);
573 be_end_uses(bel->uses);
574 obstack_free(&bel->ob, NULL);