renamed SubSP to SubSPandCopy, add some constants
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  * @version     $Id$
26  */
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include "obst.h"
32 #include "irprintf_t.h"
33 #include "irgraph.h"
34 #include "irnode.h"
35 #include "irmode.h"
36 #include "irgwalk.h"
37 #include "irloop.h"
38 #include "iredges_t.h"
39 #include "ircons_t.h"
40 #include "irprintf.h"
41 #include "irnodeset.h"
42 #include "xmalloc.h"
43 #include "pdeq.h"
44
45 #include "beutil.h"
46 #include "bearch_t.h"
47 #include "beuses.h"
48 #include "besched_t.h"
49 #include "beirgmod.h"
50 #include "belive_t.h"
51 #include "benode_t.h"
52 #include "bechordal_t.h"
53 #include "bespilloptions.h"
54 #include "beloopana.h"
55 #include "beirg_t.h"
56 #include "bespill.h"
57 #include "bemodule.h"
58
59 #define DBG_SPILL     1
60 #define DBG_WSETS     2
61 #define DBG_FIX       4
62 #define DBG_DECIDE    8
63 #define DBG_START    16
64 #define DBG_SLOTS    32
65 #define DBG_TRACE    64
66 #define DBG_WORKSET 128
67 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
68
69 /* factor to weight the different costs of reloading/rematerializing a node
70    (see bespill.h be_get_reload_costs_no_weight) */
71 #define RELOAD_COST_FACTOR   10
72
73 typedef enum {
74         value_not_reloaded,       /* the value has not been reloaded */
75         value_partially_reloaded, /* the value has been reloaded on some paths */
76         value_reloaded            /* the value has been reloaded on all paths */
77 } reloaded_state_t;
78
79 /**
80  * An association between a node and a point in time.
81  */
82 typedef struct loc_t {
83         ir_node          *node;
84         unsigned          time;     /**< A use time (see beuses.h). */
85         reloaded_state_t  reloaded; /**< the value is a reloaded value */
86 } loc_t;
87
88 typedef struct _workset_t {
89         int   len;          /**< current length */
90         loc_t vals[0];      /**< inlined array of the values/distances in this working set */
91 } workset_t;
92
93 static struct obstack               obst;
94 static const arch_env_t            *arch_env;
95 static const arch_register_class_t *cls;
96 static const be_lv_t               *lv;
97 static be_loopana_t                *loop_ana;
98 static int                          n_regs;
99 static workset_t                   *ws;     /**< the main workset used while
100                                                      processing a block. */
101 static be_uses_t                   *uses;   /**< env for the next-use magic */
102 static ir_node                     *instr;  /**< current instruction */
103 static unsigned                     instr_nr; /**< current instruction number
104                                                        (relative to block start) */
105 static ir_nodeset_t                 used;
106 static spill_env_t                 *senv;   /**< see bespill.h */
107 static pdeq                        *worklist;
108
109 static int loc_compare(const void *a, const void *b)
110 {
111         const loc_t *p = a;
112         const loc_t *q = b;
113         return p->time - q->time;
114 }
115
116 void workset_print(const workset_t *w)
117 {
118         int i;
119
120         for(i = 0; i < w->len; ++i) {
121                 ir_fprintf(stderr, "%+F %d\n", w->vals[i].node, w->vals[i].time);
122         }
123 }
124
125 /**
126  * Alloc a new workset on obstack @p ob with maximum size @p max
127  */
128 static workset_t *new_workset(void)
129 {
130         workset_t *res;
131         size_t     size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
132
133         res  = obstack_alloc(&obst, size);
134         memset(res, 0, size);
135         return res;
136 }
137
138 /**
139  * Alloc a new instance on obstack and make it equal to @param workset
140  */
141 static workset_t *workset_clone(workset_t *workset)
142 {
143         workset_t *res;
144         size_t size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
145         res = obstack_alloc(&obst, size);
146         memcpy(res, workset, size);
147         return res;
148 }
149
150 /**
151  * Copy workset @param src to @param tgt
152  */
153 static void workset_copy(workset_t *dest, const workset_t *src)
154 {
155         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
156         memcpy(dest, src, size);
157 }
158
159 /**
160  * Overwrites the current content array of @param ws with the
161  * @param count locations given at memory @param locs.
162  * Set the length of @param ws to count.
163  */
164 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
165 {
166         workset->len = count;
167         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
168 }
169
170 /**
171  * Inserts the value @p val into the workset, iff it is not
172  * already contained. The workset must not be full.
173  */
174 static void workset_insert(workset_t *workset, ir_node *val, int reloaded)
175 {
176         loc_t *loc;
177         int    i;
178         /* check for current regclass */
179         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, val));
180
181         /* check if val is already contained */
182         for (i = 0; i < workset->len; ++i) {
183                 loc = &workset->vals[i];
184                 if (loc->node == val) {
185                         if(!loc->reloaded) {
186                                 loc->reloaded = reloaded;
187                         }
188                         return;
189                 }
190         }
191
192         /* insert val */
193         assert(workset->len < n_regs && "Workset already full!");
194         loc           = &workset->vals[workset->len];
195         loc->node     = val;
196         loc->reloaded = reloaded;
197         loc->time     = 6666; /* undefined yet */
198         workset->len++;
199 }
200
201 /**
202  * Removes all entries from this workset
203  */
204 static void workset_clear(workset_t *workset)
205 {
206         workset->len = 0;
207 }
208
209 /**
210  * Removes the value @p val from the workset if present.
211  */
212 static INLINE void workset_remove(workset_t *workset, ir_node *val)
213 {
214         int i;
215         for(i = 0; i < workset->len; ++i) {
216                 if (workset->vals[i].node == val) {
217                         workset->vals[i] = workset->vals[--workset->len];
218                         return;
219                 }
220         }
221 }
222
223 static INLINE int workset_contains(const workset_t *ws, const ir_node *val)
224 {
225         int i;
226
227         for(i=0; i<ws->len; ++i) {
228                 if (ws->vals[i].node == val)
229                         return 1;
230         }
231
232         return 0;
233 }
234
235 /**
236  * Iterates over all values in the working set.
237  * @p ws The workset to iterate
238  * @p v  A variable to put the current value in
239  * @p i  An integer for internal use
240  */
241 #define workset_foreach(ws, v, i)       for(i=0; \
242                                                                                 v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; \
243                                                                                 ++i)
244
245 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
246 #define workset_get_time(ws, i) (ws)->vals[i].time
247 #define workset_set_length(ws, length) (ws)->len = length
248 #define workset_get_length(ws) ((ws)->len)
249 #define workset_get_val(ws, i) ((ws)->vals[i].node)
250 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
251
252 typedef struct _block_info_t
253 {
254         workset_t *start_workset;
255         workset_t *end_workset;
256 } block_info_t;
257
258
259 static void *new_block_info(void)
260 {
261         block_info_t *res = obstack_alloc(&obst, sizeof(res[0]));
262         memset(res, 0, sizeof(res[0]));
263
264         return res;
265 }
266
267 #define get_block_info(block)        ((block_info_t *)get_irn_link(block))
268 #define set_block_info(block, info)  set_irn_link(block, info)
269
270 /**
271  * @return The distance to the next use or 0 if irn has dont_spill flag set
272  */
273 static INLINE unsigned get_distance(ir_node *from, unsigned from_step,
274                                     const ir_node *def, int skip_from_uses)
275 {
276         be_next_use_t use;
277         int           flags = arch_irn_get_flags(arch_env, def);
278         unsigned      costs;
279         unsigned      time;
280
281         assert(! (flags & arch_irn_flags_ignore));
282
283         use = be_get_next_use(uses, from, from_step, def, skip_from_uses);
284         if(USES_IS_INFINITE(use.time))
285                 return USES_INFINITY;
286
287         /* We have to keep nonspillable nodes in the workingset */
288         if(flags & arch_irn_flags_dont_spill)
289                 return 0;
290
291         costs = be_get_reload_costs_no_weight(senv, def, use.before);
292         assert(costs * RELOAD_COST_FACTOR < 1000);
293         time  = use.time + 1000 - (costs * RELOAD_COST_FACTOR);
294
295         return time;
296 }
297
298 /**
299  * Performs the actions necessary to grant the request that:
300  * - new_vals can be held in registers
301  * - as few as possible other values are disposed
302  * - the worst values get disposed
303  *
304  * @p is_usage indicates that the values in new_vals are used (not defined)
305  * In this case reloads must be performed
306  */
307 static void displace(workset_t *new_vals, int is_usage)
308 {
309         ir_node **to_insert = alloca(n_regs * sizeof(to_insert[0]));
310         ir_node  *val;
311         int       i;
312         int       len;
313         int       spills_needed;
314         int       demand;
315         int       iter;
316
317         /* 1. Identify the number of needed slots and the values to reload */
318         demand = 0;
319         workset_foreach(new_vals, val, iter) {
320                 /* mark value as used */
321                 if (is_usage)
322                         ir_nodeset_insert(&used, val);
323
324                 if (! workset_contains(ws, val)) {
325                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
326                         if (is_usage) {
327                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
328                                 be_add_reload(senv, val, instr, cls, 1);
329                         }
330                 } else {
331                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
332                         assert(is_usage);
333                         /* remove the value from the current workset so it is not accidently
334                          * spilled */
335                         workset_remove(ws, val);
336                 }
337                 to_insert[demand++] = val;
338         }
339
340         /* 2. Make room for at least 'demand' slots */
341         len           = workset_get_length(ws);
342         spills_needed = len + demand - n_regs;
343         assert(spills_needed <= len);
344
345         /* Only make more free room if we do not have enough */
346         if (spills_needed > 0) {
347                 ir_node   *curr_bb  = get_nodes_block(instr);
348                 workset_t *ws_start = get_block_info(curr_bb)->start_workset;
349
350                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
351
352                 /* calculate current next-use distance for live values */
353                 for (i = 0; i < len; ++i) {
354                         ir_node  *val  = workset_get_val(ws, i);
355                         unsigned  dist = get_distance(instr, instr_nr, val, !is_usage);
356                         workset_set_time(ws, i, dist);
357                 }
358
359                 /* sort entries by increasing nextuse-distance*/
360                 workset_sort(ws);
361
362                 /* Logic for not needed live-ins: If a value is disposed
363                  * before its first usage, remove it from start workset
364                  * We don't do this for phis though     */
365                 for (i = len - spills_needed; i < len; ++i) {
366                         ir_node *val = ws->vals[i].node;
367
368                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
369                              workset_get_time(ws, i)));
370
371                         if(!USES_IS_INFINITE(ws->vals[i].time)
372                                         && !ws->vals[i].reloaded) {
373                                 //be_add_spill(senv, val, instr);
374                         }
375
376                         if (!is_Phi(val) && ! ir_nodeset_contains(&used, val)) {
377                                 workset_remove(ws_start, val);
378                                 DB((dbg, DBG_DECIDE, "    (and removing %+F from start workset)\n", val));
379                         }
380                 }
381
382                 /* kill the last 'demand' entries in the array */
383                 workset_set_length(ws, len - spills_needed);
384         }
385
386         /* 3. Insert the new values into the workset */
387         for (i = 0; i < demand; ++i) {
388                 ir_node *val = to_insert[i];
389
390                 workset_insert(ws, val, 1);
391         }
392 }
393
394 /** Decides whether a specific node should be in the start workset or not
395  *
396  * @param env      belady environment
397  * @param first
398  * @param node     the node to test
399  * @param loop     the loop of the node
400  */
401 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
402                                     ir_loop *loop)
403 {
404         be_next_use_t next_use;
405         loc_t         loc;
406
407         loc.time     = USES_INFINITY;
408         loc.node     = node;
409         //loc.reloaded = rand() % 2; /* provoke a bug... */
410         loc.reloaded = 0;
411
412         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
413                 loc.time = USES_INFINITY;
414                 return loc;
415         }
416
417         /* We have to keep nonspillable nodes in the workingset */
418         if(arch_irn_get_flags(arch_env, node) & arch_irn_flags_dont_spill) {
419                 loc.time = 0;
420                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
421                 return loc;
422         }
423
424         next_use = be_get_next_use(uses, first, 0, node, 0);
425         if(USES_IS_INFINITE(next_use.time)) {
426                 // the nodes marked as live in shouldn't be dead, so it must be a phi
427                 assert(is_Phi(node));
428                 loc.time = USES_INFINITY;
429                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
430                 if(is_Phi(node)) {
431                         be_spill_phi(senv, node);
432                 }
433                 return loc;
434         }
435
436         loc.time = next_use.time;
437
438         if(next_use.outermost_loop >= get_loop_depth(loop)) {
439                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
440                     next_use.outermost_loop));
441         } else {
442                 loc.time = USES_PENDING;
443                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
444                     node, next_use.outermost_loop, get_loop_depth(loop)));
445         }
446         return loc;
447 }
448
449 /**
450  * Computes the start-workset for a block with multiple predecessors. We assume
451  * that at least 1 of the predeccesors is a back-edge which means we're at the
452  * beginning of a loop. We try to reload as much values as possible now so they
453  * don't get reloaded inside the loop.
454  */
455 static void compute_live_ins(const ir_node *block)
456 {
457         ir_loop    *loop = get_irn_loop(block);
458         ir_node    *first;
459         ir_node    *node;
460         loc_t       loc;
461         loc_t      *starters;
462         loc_t      *delayed;
463         int         i, len, ws_count;
464         int             free_slots, free_pressure_slots;
465         unsigned    pressure;
466         //int arity;
467         //int         n_pred_worksets;
468         //workset_t **pred_worksets;
469
470         /* Collect all values living at start of block */
471         starters = NEW_ARR_F(loc_t, 0);
472         delayed  = NEW_ARR_F(loc_t, 0);
473
474         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
475         first = sched_first(block);
476
477         /* check all Phis first */
478         sched_foreach(block, node) {
479                 if (! is_Phi(node))
480                         break;
481
482                 loc = to_take_or_not_to_take(first, node, loop);
483
484                 if (! USES_IS_INFINITE(loc.time)) {
485                         if (USES_IS_PENDING(loc.time))
486                                 ARR_APP1(loc_t, delayed, loc);
487                         else
488                                 ARR_APP1(loc_t, starters, loc);
489                 }
490         }
491
492         /* check all Live-Ins */
493         be_lv_foreach(lv, block, be_lv_state_in, i) {
494                 ir_node *node = be_lv_get_irn(lv, block, i);
495
496                 loc = to_take_or_not_to_take(first, node, loop);
497
498                 if (! USES_IS_INFINITE(loc.time)) {
499                         if (USES_IS_PENDING(loc.time))
500                                 ARR_APP1(loc_t, delayed, loc);
501                         else
502                                 ARR_APP1(loc_t, starters, loc);
503                 }
504         }
505
506         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
507         assert(ARR_LEN(delayed) <= (signed)pressure);
508         free_slots          = n_regs - ARR_LEN(starters);
509         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
510         free_slots          = MIN(free_slots, free_pressure_slots);
511
512         /* so far we only put nodes into the starters list that are used inside
513          * the loop. If register pressure in the loop is low then we can take some
514          * values and let them live through the loop */
515         if (free_slots > 0) {
516                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
517
518                 for (i = 0; i < ARR_LEN(delayed) && i < free_slots; ++i) {
519                         int    p, arity;
520                         loc_t *loc = & delayed[i];
521
522                         /* don't use values which are dead in a known predecessors
523                          * to not induce unnecessary reloads */
524                         arity = get_irn_arity(block);
525                         for (p = 0; p < arity; ++p) {
526                                 ir_node      *pred_block = get_Block_cfgpred_block(block, p);
527                                 block_info_t *pred_info  = get_block_info(pred_block);
528
529                                 if (pred_info == NULL)
530                                         continue;
531
532                                 if (!workset_contains(pred_info->end_workset, loc->node)) {
533                                         DB((dbg, DBG_START,
534                                             "    delayed %+F not live at pred %+F\n", loc->node,
535                                             pred_block));
536                                         goto skip_delayed;
537                                 }
538                         }
539
540                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
541                         ARR_APP1(loc_t, starters, *loc);
542                         loc->node = NULL;
543                 skip_delayed:
544                         ;
545                 }
546         }
547
548         /* spill phis (the actual phis not just their values) that are in this block
549          * but not in the start workset */
550         for (i = ARR_LEN(delayed) - 1; i >= 0; --i) {
551                 ir_node *node = delayed[i].node;
552                 if(node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
553                         continue;
554
555                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
556                 be_spill_phi(senv, node);
557         }
558         DEL_ARR_F(delayed);
559
560         /* Sort start values by first use */
561         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
562
563         /* Copy the best ones from starters to start workset */
564         ws_count = MIN(ARR_LEN(starters), n_regs);
565         workset_clear(ws);
566         workset_bulk_fill(ws, ws_count, starters);
567
568         /* spill phis (the actual phis not just their values) that are in this block
569          * but not in the start workset */
570         len = ARR_LEN(starters);
571         for (i = ws_count; i < len; ++i) {
572                 ir_node *node = starters[i].node;
573                 if (! is_Phi(node) || get_nodes_block(node) != block)
574                         continue;
575
576                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
577                 be_spill_phi(senv, node);
578         }
579
580         DEL_ARR_F(starters);
581
582 #if 0
583         /* determine reloaded status of the values: If there's 1 pred block (which
584          * is no backedge) where the value is reloaded then we must set it to
585          * reloaded here. We place spills in all pred where the value was not yet
586          * reloaded to be sure we have a spill on each path */
587         n_pred_worksets = 0;
588         arity           = get_irn_arity(block);
589         pred_worksets   = alloca(sizeof(pred_worksets[0]) * arity);
590         for(i = 0; i < arity; ++i) {
591                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
592                 block_info_t *pred_info  = get_block_info(pred_block);
593                 if(pred_info == NULL)
594                         continue;
595
596                 pred_worksets[n_pred_worksets] = pred_info->end_workset;
597                 ++n_pred_worksets;
598         }
599
600         for(i = 0; i < ws_count; ++i) {
601                 loc_t   *loc   = &ws->vals[i];
602                 ir_node *value = loc->node;
603                 int      reloaded;
604                 int      n;
605
606                 /* phis from this block aren't reloaded */
607                 if(get_nodes_block(value) == block) {
608                         assert(is_Phi(value));
609                         loc->reloaded = value_not_reloaded;
610                         continue;
611                 }
612
613                 /* was the value reloaded on any of the other inputs */
614                 reloaded = 0;
615                 arity    = get_Block_n_cfgpreds(block);
616                 for(n = 0; n < n_pred_worksets; ++n) {
617                         workset_t *pred_workset = pred_worksets[n];
618                         int        p_len        = workset_get_length(pred_workset);
619                         int        p;
620
621                         for(p = 0; p < p_len; ++p) {
622                                 loc_t *l = &pred_workset->vals[p];
623                                 if(l->node == value) {
624                                         if(l->reloaded) {
625                                                 reloaded = 1;
626                                         }
627                                         break;
628                                 }
629                         }
630                         if(p >= p_len) {
631                                 reloaded = 1;
632                                 break;
633                         }
634                 }
635         }
636 #endif
637 }
638
639 /**
640  * For the given block @p block, decide for each values
641  * whether it is used from a register or is reloaded
642  * before the use.
643  */
644 static void belady(ir_node *block)
645 {
646         workset_t       *new_vals;
647         ir_node         *irn;
648         int              iter;
649         block_info_t    *block_info;
650         int              i, arity;
651         int              has_backedges = 0;
652         //int              first         = 0;
653         const ir_edge_t *edge;
654
655         /* no need to process a block twice */
656         if(get_block_info(block) != NULL) {
657                 return;
658         }
659
660         /* check if all predecessor blocks are processed yet (though for backedges
661          * we have to make an exception as we can't process them first) */
662         arity = get_Block_n_cfgpreds(block);
663         for(i = 0; i < arity; ++i) {
664                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
665                 block_info_t *pred_info  = get_block_info(pred_block);
666
667                 if(pred_info == NULL) {
668                         /* process predecessor first (it will be in the queue already) */
669                         if(!is_backedge(block, i)) {
670                                 return;
671                         }
672                         has_backedges = 1;
673                 }
674         }
675         (void) has_backedges;
676         if(arity == 0) {
677                 workset_clear(ws);
678         } else if(arity == 1) {
679                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
680                 block_info_t *pred_info  = get_block_info(pred_block);
681
682                 assert(pred_info != NULL);
683                 workset_copy(ws, pred_info->end_workset);
684         } else {
685                 /* we need 2 heuristics here, for the case when all predecessor blocks
686                  * are known and when some are backedges (and therefore can't be known
687                  * yet) */
688                 compute_live_ins(block);
689         }
690
691         DB((dbg, DBG_DECIDE, "\n"));
692         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
693
694         block_info = new_block_info();
695         set_block_info(block, block_info);
696
697         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
698         workset_foreach(ws, irn, iter) {
699                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
700                      workset_get_time(ws, iter)));
701         }
702
703         block_info->start_workset = workset_clone(ws);
704
705         /* process the block from start to end */
706         DB((dbg, DBG_WSETS, "Processing...\n"));
707         ir_nodeset_init(&used);
708         instr_nr = 0;
709         /* TODO: this leaks (into the obstack)... */
710         new_vals = new_workset();
711
712         sched_foreach(block, irn) {
713                 int i, arity;
714                 assert(workset_get_length(ws) <= n_regs);
715
716                 /* Phis are no real instr (see insert_starters()) */
717                 if (is_Phi(irn)) {
718                         continue;
719                 }
720                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
721
722                 /* set instruction in the workset */
723                 instr = irn;
724
725                 /* allocate all values _used_ by this instruction */
726                 workset_clear(new_vals);
727                 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
728                         ir_node *in = get_irn_n(irn, i);
729                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, in))
730                                 continue;
731
732                         /* (note that reloaded_value is irrelevant here) */
733                         workset_insert(new_vals, in, 0);
734                 }
735                 displace(new_vals, 1);
736
737                 /* allocate all values _defined_ by this instruction */
738                 workset_clear(new_vals);
739                 if (get_irn_mode(irn) == mode_T) {
740                         const ir_edge_t *edge;
741
742                         foreach_out_edge(irn, edge) {
743                                 ir_node *proj = get_edge_src_irn(edge);
744                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
745                                         continue;
746                                 workset_insert(new_vals, proj, 0);
747                         }
748                 } else {
749                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, irn))
750                                 continue;
751                         workset_insert(new_vals, irn, 0);
752                 }
753                 displace(new_vals, 0);
754
755                 instr_nr++;
756         }
757         ir_nodeset_destroy(&used);
758
759         /* Remember end-workset for this block */
760         block_info->end_workset = workset_clone(ws);
761         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
762         workset_foreach(ws, irn, iter)
763                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
764                      workset_get_time(ws, iter)));
765
766         /* add successor blocks into worklist */
767         foreach_block_succ(block, edge) {
768                 ir_node *succ = get_edge_src_irn(edge);
769                 pdeq_putr(worklist, succ);
770         }
771 }
772
773 /**
774  * 'decide' is block-local and makes assumptions
775  * about the set of live-ins. Thus we must adapt the
776  * live-outs to the live-ins at each block-border.
777  */
778 static void fix_block_borders(ir_node *block, void *data)
779 {
780         workset_t    *start_workset;
781         int           arity;
782         int           i;
783         int           iter;
784         (void) data;
785
786         DB((dbg, DBG_FIX, "\n"));
787         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
788
789         start_workset = get_block_info(block)->start_workset;
790
791         /* process all pred blocks */
792         arity = get_irn_arity(block);
793         for (i = 0; i < arity; ++i) {
794                 ir_node   *pred = get_Block_cfgpred_block(block, i);
795                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
796                 ir_node   *node;
797
798                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
799
800                 /* spill all values not used anymore */
801                 workset_foreach(pred_end_workset, node, iter) {
802                         ir_node *n2;
803                         int      iter2;
804                         int      found = 0;
805                         workset_foreach(start_workset, n2, iter2) {
806                                 if(n2 == node) {
807                                         found = 1;
808                                         break;
809                                 }
810                                 /* note that we do not look at phi inputs, becuase the values
811                                  * will be either live-end and need no spill or
812                                  * they have other users in which must be somewhere else in the
813                                  * workset */
814                         }
815
816 #if 0
817                         if(!found && be_is_live_out(lv, pred, node)
818                                         && !pred_end_workset->vals[iter].reloaded) {
819                                 ir_node *insert_point
820                                         = be_get_end_of_block_insertion_point(pred);
821                                 DB((dbg, DBG_SPILL, "Spill %+F before %+F\n", node,
822                                      insert_point));
823                                 be_add_spill(senv, node, insert_point);
824                         }
825 #endif
826                 }
827
828                 /* reload missing values in predecessors */
829                 workset_foreach(start_workset, node, iter) {
830                         /* if node is a phi of the current block we reload
831                          * the corresponding argument, else node itself */
832                         if(is_Phi(node) && block == get_nodes_block(node)) {
833                                 node = get_irn_n(node, i);
834
835                                 /* we might have unknowns as argument for the phi */
836                                 if(!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
837                                         continue;
838                         }
839
840                         /* check if node is in a register at end of pred */
841                         if(workset_contains(pred_end_workset, node))
842                                 continue;
843
844                         /* node is not in memory at the end of pred -> reload it */
845                         DB((dbg, DBG_FIX, "    reload %+F\n", node));
846                         DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
847                         be_add_reload_on_edge(senv, node, block, i, cls, 1);
848                 }
849         }
850 }
851
852 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
853 {
854         ir_graph *irg = be_get_birg_irg(birg);
855
856         be_liveness_assure_sets(be_assure_liveness(birg));
857
858         /* construct control flow loop tree */
859         if(! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
860                 construct_cf_backedges(irg);
861         }
862
863         be_clear_links(irg);
864
865         /* init belady env */
866         obstack_init(&obst);
867         arch_env = birg->main_env->arch_env;
868         cls      = rcls;
869         lv       = be_get_birg_liveness(birg);
870         n_regs   = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
871         ws       = new_workset();
872         uses     = be_begin_uses(irg, lv);
873         loop_ana = be_new_loop_pressure(birg);
874         senv     = be_new_spill_env(birg);
875         worklist = new_pdeq();
876
877         pdeq_putr(worklist, get_irg_start_block(irg));
878
879         while(!pdeq_empty(worklist)) {
880                 ir_node *block = pdeq_getl(worklist);
881                 belady(block);
882         }
883         /* end block might not be reachable in endless loops */
884         belady(get_irg_end_block(irg));
885
886         del_pdeq(worklist);
887
888         /* belady was block-local, fix the global flow by adding reloads on the
889          * edges */
890         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
891
892         /* Insert spill/reload nodes into the graph and fix usages */
893         be_insert_spills_reloads(senv);
894
895         /* clean up */
896         be_delete_spill_env(senv);
897         be_end_uses(uses);
898         be_free_loop_pressure(loop_ana);
899         obstack_free(&obst, NULL);
900 }
901
902 void be_init_spillbelady(void)
903 {
904         static be_spiller_t belady_spiller = {
905                 be_spill_belady
906         };
907
908         be_register_spiller("belady", &belady_spiller);
909         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
910 }
911
912 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);