again
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  * @version     $Id$
26  */
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include <stdbool.h>
32
33 #include "obst.h"
34 #include "irprintf_t.h"
35 #include "irgraph.h"
36 #include "irnode.h"
37 #include "irmode.h"
38 #include "irgwalk.h"
39 #include "irloop.h"
40 #include "iredges_t.h"
41 #include "ircons_t.h"
42 #include "irprintf.h"
43 #include "irnodeset.h"
44
45 #include "beutil.h"
46 #include "bearch_t.h"
47 #include "beuses.h"
48 #include "besched_t.h"
49 #include "beirgmod.h"
50 #include "belive_t.h"
51 #include "benode_t.h"
52 #include "bechordal_t.h"
53 #include "bespilloptions.h"
54 #include "beloopana.h"
55 #include "beirg_t.h"
56 #include "bespill.h"
57 #include "bemodule.h"
58
59 #define DBG_SPILL     1
60 #define DBG_WSETS     2
61 #define DBG_FIX       4
62 #define DBG_DECIDE    8
63 #define DBG_START    16
64 #define DBG_SLOTS    32
65 #define DBG_TRACE    64
66 #define DBG_WORKSET 128
67 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
68
69 #define TIME_UNDEFINED 6666
70
71 //#define LOOK_AT_LOOPDEPTH
72
73 /**
74  * An association between a node and a point in time.
75  */
76 typedef struct loc_t {
77         ir_node          *node;
78         unsigned          time;     /**< A use time (see beuses.h). */
79         bool              spilled;  /**< the value was already spilled on this path */
80 } loc_t;
81
82 typedef struct _workset_t {
83         int   len;          /**< current length */
84         loc_t vals[0];      /**< inlined array of the values/distances in this working set */
85 } workset_t;
86
87 static struct obstack               obst;
88 static const arch_env_t            *arch_env;
89 static const arch_register_class_t *cls;
90 static const be_lv_t               *lv;
91 static be_loopana_t                *loop_ana;
92 static int                          n_regs;
93 static workset_t                   *ws;     /**< the main workset used while
94                                                      processing a block. */
95 static be_uses_t                   *uses;   /**< env for the next-use magic */
96 static ir_node                     *instr;  /**< current instruction */
97 static unsigned                     instr_nr; /**< current instruction number
98                                                        (relative to block start) */
99 static spill_env_t                 *senv;   /**< see bespill.h */
100 static ir_node                    **blocklist;
101
102 static bool                         move_spills      = true;
103 static bool                         respectloopdepth = true;
104 static bool                         improve_known_preds = true;
105 /* factor to weight the different costs of reloading/rematerializing a node
106    (see bespill.h be_get_reload_costs_no_weight) */
107 static int                          remat_bonus      = 10;
108
109 static const lc_opt_table_entry_t options[] = {
110         LC_OPT_ENT_BOOL   ("movespills", "try to move spills out of loops", &move_spills),
111         LC_OPT_ENT_BOOL   ("respectloopdepth", "exprimental (outermost loop cutting)", &respectloopdepth),
112         LC_OPT_ENT_BOOL   ("improveknownpreds", "experimental (known preds cutting)", &improve_known_preds),
113         LC_OPT_ENT_INT    ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
114         LC_OPT_LAST
115 };
116
117 static int loc_compare(const void *a, const void *b)
118 {
119         const loc_t *p = a;
120         const loc_t *q = b;
121         return p->time - q->time;
122 }
123
124 void workset_print(const workset_t *w)
125 {
126         int i;
127
128         for(i = 0; i < w->len; ++i) {
129                 ir_fprintf(stderr, "%+F %d\n", w->vals[i].node, w->vals[i].time);
130         }
131 }
132
133 /**
134  * Alloc a new workset on obstack @p ob with maximum size @p max
135  */
136 static workset_t *new_workset(void)
137 {
138         workset_t *res;
139         size_t     size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
140
141         res  = obstack_alloc(&obst, size);
142         memset(res, 0, size);
143         return res;
144 }
145
146 /**
147  * Alloc a new instance on obstack and make it equal to @param workset
148  */
149 static workset_t *workset_clone(workset_t *workset)
150 {
151         workset_t *res;
152         size_t size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
153         res = obstack_alloc(&obst, size);
154         memcpy(res, workset, size);
155         return res;
156 }
157
158 /**
159  * Copy workset @param src to @param tgt
160  */
161 static void workset_copy(workset_t *dest, const workset_t *src)
162 {
163         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
164         memcpy(dest, src, size);
165 }
166
167 /**
168  * Overwrites the current content array of @param ws with the
169  * @param count locations given at memory @param locs.
170  * Set the length of @param ws to count.
171  */
172 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
173 {
174         workset->len = count;
175         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
176 }
177
178 /**
179  * Inserts the value @p val into the workset, iff it is not
180  * already contained. The workset must not be full.
181  */
182 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
183 {
184         loc_t *loc;
185         int    i;
186         /* check for current regclass */
187         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, val));
188
189         /* check if val is already contained */
190         for (i = 0; i < workset->len; ++i) {
191                 loc = &workset->vals[i];
192                 if (loc->node == val) {
193                         if (spilled) {
194                                 loc->spilled = true;
195                         }
196                         return;
197                 }
198         }
199
200         /* insert val */
201         assert(workset->len < n_regs && "Workset already full!");
202         loc           = &workset->vals[workset->len];
203         loc->node     = val;
204         loc->spilled  = spilled;
205         loc->time     = TIME_UNDEFINED;
206         workset->len++;
207 }
208
209 /**
210  * Removes all entries from this workset
211  */
212 static void workset_clear(workset_t *workset)
213 {
214         workset->len = 0;
215 }
216
217 /**
218  * Removes the value @p val from the workset if present.
219  */
220 static INLINE void workset_remove(workset_t *workset, ir_node *val)
221 {
222         int i;
223         for(i = 0; i < workset->len; ++i) {
224                 if (workset->vals[i].node == val) {
225                         workset->vals[i] = workset->vals[--workset->len];
226                         return;
227                 }
228         }
229 }
230
231 static INLINE const loc_t *workset_contains(const workset_t *ws,
232                                             const ir_node *val)
233 {
234         int i;
235
236         for (i = 0; i < ws->len; ++i) {
237                 if (ws->vals[i].node == val)
238                         return &ws->vals[i];
239         }
240
241         return NULL;
242 }
243
244 /**
245  * Iterates over all values in the working set.
246  * @p ws The workset to iterate
247  * @p v  A variable to put the current value in
248  * @p i  An integer for internal use
249  */
250 #define workset_foreach(ws, v, i)       for(i=0; \
251                                                                                 v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; \
252                                                                                 ++i)
253
254 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
255 #define workset_get_time(ws, i) (ws)->vals[i].time
256 #define workset_set_length(ws, length) (ws)->len = length
257 #define workset_get_length(ws) ((ws)->len)
258 #define workset_get_val(ws, i) ((ws)->vals[i].node)
259 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
260
261 typedef struct _block_info_t
262 {
263         workset_t *start_workset;
264         workset_t *end_workset;
265 } block_info_t;
266
267
268 static void *new_block_info(void)
269 {
270         block_info_t *res = obstack_alloc(&obst, sizeof(res[0]));
271         memset(res, 0, sizeof(res[0]));
272
273         return res;
274 }
275
276 #define get_block_info(block)        ((block_info_t *)get_irn_link(block))
277 #define set_block_info(block, info)  set_irn_link(block, info)
278
279 /**
280  * @return The distance to the next use or 0 if irn has dont_spill flag set
281  */
282 static INLINE unsigned get_distance(ir_node *from, unsigned from_step,
283                                     const ir_node *def, int skip_from_uses)
284 {
285         be_next_use_t use;
286         int           flags = arch_irn_get_flags(arch_env, def);
287         unsigned      costs;
288         unsigned      time;
289
290         assert(! (flags & arch_irn_flags_ignore));
291
292         use  = be_get_next_use(uses, from, from_step, def, skip_from_uses);
293         time = use.time;
294         if (USES_IS_INFINITE(time))
295                 return USES_INFINITY;
296
297         /* We have to keep nonspillable nodes in the workingset */
298         if (flags & arch_irn_flags_dont_spill)
299                 return 0;
300
301         /* give some bonus to rematerialisable nodes */
302         if (remat_bonus > 0) {
303                 costs = be_get_reload_costs_no_weight(senv, def, use.before);
304                 assert(costs * remat_bonus < 1000);
305                 time  += 1000 - (costs * remat_bonus);
306         }
307
308         return time;
309 }
310
311 /**
312  * Performs the actions necessary to grant the request that:
313  * - new_vals can be held in registers
314  * - as few as possible other values are disposed
315  * - the worst values get disposed
316  *
317  * @p is_usage indicates that the values in new_vals are used (not defined)
318  * In this case reloads must be performed
319  */
320 static void displace(workset_t *new_vals, int is_usage)
321 {
322         ir_node **to_insert = alloca(n_regs * sizeof(to_insert[0]));
323         bool     *spilled   = alloca(n_regs * sizeof(spilled[0]));
324         ir_node  *val;
325         int       i;
326         int       len;
327         int       spills_needed;
328         int       demand;
329         int       iter;
330
331         /* 1. Identify the number of needed slots and the values to reload */
332         demand = 0;
333         workset_foreach(new_vals, val, iter) {
334                 bool reloaded = false;
335
336                 if (! workset_contains(ws, val)) {
337                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
338                         if (is_usage) {
339                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
340                                 be_add_reload(senv, val, instr, cls, 1);
341                                 reloaded = true;
342                         }
343                 } else {
344                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
345                         assert(is_usage);
346                         /* remove the value from the current workset so it is not accidently
347                          * spilled */
348                         workset_remove(ws, val);
349                 }
350                 spilled[demand]   = reloaded;
351                 to_insert[demand] = val;
352                 ++demand;
353         }
354
355         /* 2. Make room for at least 'demand' slots */
356         len           = workset_get_length(ws);
357         spills_needed = len + demand - n_regs;
358         assert(spills_needed <= len);
359
360         /* Only make more free room if we do not have enough */
361         if (spills_needed > 0) {
362                 ir_node   *curr_bb  = NULL;
363                 workset_t *ws_start = NULL;
364
365                 if (move_spills) {
366                         curr_bb  = get_nodes_block(instr);
367                         ws_start = get_block_info(curr_bb)->start_workset;
368                 }
369
370                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
371
372                 /* calculate current next-use distance for live values */
373                 for (i = 0; i < len; ++i) {
374                         ir_node  *val  = workset_get_val(ws, i);
375                         unsigned  dist = get_distance(instr, instr_nr, val, !is_usage);
376                         workset_set_time(ws, i, dist);
377                 }
378
379                 /* sort entries by increasing nextuse-distance*/
380                 workset_sort(ws);
381
382                 for (i = len - spills_needed; i < len; ++i) {
383                         ir_node *val = ws->vals[i].node;
384
385                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
386                              workset_get_time(ws, i)));
387
388                         if (move_spills) {
389                                 if (!USES_IS_INFINITE(ws->vals[i].time)
390                                                 && !ws->vals[i].spilled) {
391                                         ir_node *after_pos = sched_prev(instr);
392                                         DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
393                                                 after_pos));
394                                         be_add_spill(senv, val, after_pos);
395                                 }
396                         }
397                 }
398
399                 /* kill the last 'demand' entries in the array */
400                 workset_set_length(ws, len - spills_needed);
401         }
402
403         /* 3. Insert the new values into the workset */
404         for (i = 0; i < demand; ++i) {
405                 ir_node *val = to_insert[i];
406
407                 workset_insert(ws, val, spilled[i]);
408         }
409 }
410
411 enum {
412         AVAILABLE_EVERYWHERE,
413         AVAILABLE_NOWHERE,
414         AVAILABLE_PARTLY,
415         AVAILABLE_UNKNOWN
416 };
417
418 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
419                                        size_t n_pred_worksets,
420                                        const ir_node *value, bool is_local_phi)
421 {
422         size_t i;
423         bool   avail_everywhere = true;
424         bool   avail_nowhere    = true;
425
426         assert(n_pred_worksets > 0);
427
428         /* value available in all preds? */
429         for (i = 0; i < n_pred_worksets; ++i) {
430                 bool             found     = false;
431                 const workset_t *p_workset = pred_worksets[i];
432                 int              p_len     = workset_get_length(p_workset);
433                 int              p_i;
434                 const ir_node   *l_value;
435
436                 if (is_local_phi) {
437                         assert(is_Phi(value));
438                         l_value = get_irn_n(value, i);
439                 } else {
440                         l_value = value;
441                 }
442
443                 for (p_i = 0; p_i < p_len; ++p_i) {
444                         const loc_t *p_l = &p_workset->vals[p_i];
445                         if (p_l->node != l_value)
446                                 continue;
447
448                         found = true;
449                         break;
450                 }
451
452                 if (found) {
453                         avail_nowhere = false;
454                 } else {
455                         avail_everywhere = false;
456                 }
457         }
458
459         if (avail_everywhere) {
460                 assert(!avail_nowhere);
461                 return AVAILABLE_EVERYWHERE;
462         } else if (avail_nowhere) {
463                 return AVAILABLE_NOWHERE;
464         } else {
465                 return AVAILABLE_PARTLY;
466         }
467 }
468
469 /** Decides whether a specific node should be in the start workset or not
470  *
471  * @param env      belady environment
472  * @param first
473  * @param node     the node to test
474  * @param loop     the loop of the node
475  */
476 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
477                                     ir_loop *loop, unsigned available)
478 {
479         be_next_use_t next_use;
480         loc_t         loc;
481
482         loc.time    = USES_INFINITY;
483         loc.node    = node;
484         loc.spilled = false;
485
486         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
487                 loc.time = USES_INFINITY;
488                 return loc;
489         }
490
491         /* We have to keep nonspillable nodes in the workingset */
492         if (arch_irn_get_flags(arch_env, node) & arch_irn_flags_dont_spill) {
493                 loc.time = 0;
494                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
495                 return loc;
496         }
497
498         next_use = be_get_next_use(uses, first, 0, node, 0);
499         if (USES_IS_INFINITE(next_use.time)) {
500                 // the nodes marked as live in shouldn't be dead, so it must be a phi
501                 assert(is_Phi(node));
502                 loc.time = USES_INFINITY;
503                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
504                 return loc;
505         }
506
507         loc.time = next_use.time;
508
509         if (improve_known_preds) {
510                 if (available == AVAILABLE_EVERYWHERE) {
511                         DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n",
512                             node, loc.time));
513                         return loc;
514                 } else if(available == AVAILABLE_NOWHERE) {
515                         DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n",
516                             node, loc.time));
517                         loc.time = USES_INFINITY;
518                         return loc;
519                 }
520         }
521
522         if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
523                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
524                     next_use.outermost_loop));
525         } else {
526                 loc.time = USES_PENDING;
527                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
528                     node, next_use.outermost_loop, get_loop_depth(loop)));
529         }
530
531         return loc;
532 }
533
534 /**
535  * Computes the start-workset for a block with multiple predecessors. We assume
536  * that at least 1 of the predeccesors is a back-edge which means we're at the
537  * beginning of a loop. We try to reload as much values as possible now so they
538  * don't get reloaded inside the loop.
539  */
540 static void decide_start_workset(const ir_node *block)
541 {
542         ir_loop    *loop = get_irn_loop(block);
543         ir_node    *first;
544         ir_node    *node;
545         loc_t       loc;
546         loc_t      *starters;
547         loc_t      *delayed;
548         int         i, len, ws_count;
549         int             free_slots, free_pressure_slots;
550         unsigned    pressure;
551         int         arity;
552         workset_t **pred_worksets;
553         bool        all_preds_known;
554
555         /* check predecessors */
556         arity           = get_irn_arity(block);
557         pred_worksets   = alloca(sizeof(pred_worksets[0]) * arity);
558         all_preds_known = true;
559         for(i = 0; i < arity; ++i) {
560                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
561                 block_info_t *pred_info  = get_block_info(pred_block);
562
563                 if (pred_info == NULL) {
564                         pred_worksets[i] = NULL;
565                         all_preds_known  = false;
566                 } else {
567                         pred_worksets[i] = pred_info->end_workset;
568                 }
569         }
570
571         /* Collect all values living at start of block */
572         starters = NEW_ARR_F(loc_t, 0);
573         delayed  = NEW_ARR_F(loc_t, 0);
574
575         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
576         first = sched_first(block);
577
578         /* check all Phis first */
579         sched_foreach(block, node) {
580                 unsigned available;
581
582                 if (! is_Phi(node))
583                         break;
584                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
585                         continue;
586
587                 if (all_preds_known) {
588                         available = available_in_all_preds(pred_worksets, arity, node, true);
589                 } else {
590                         available = AVAILABLE_UNKNOWN;
591                 }
592
593                 loc = to_take_or_not_to_take(first, node, loop, available);
594
595                 if (! USES_IS_INFINITE(loc.time)) {
596                         if (USES_IS_PENDING(loc.time) && !all_preds_known)
597                                 ARR_APP1(loc_t, delayed, loc);
598                         else
599                                 ARR_APP1(loc_t, starters, loc);
600                 } else {
601                         be_spill_phi(senv, node);
602                 }
603         }
604
605         /* check all Live-Ins */
606         be_lv_foreach(lv, block, be_lv_state_in, i) {
607                 ir_node *node = be_lv_get_irn(lv, block, i);
608                 unsigned available;
609
610                 if (all_preds_known) {
611                         available = available_in_all_preds(pred_worksets, arity, node, false);
612                 } else {
613                         available = AVAILABLE_UNKNOWN;
614                 }
615
616                 loc = to_take_or_not_to_take(first, node, loop, available);
617
618                 if (! USES_IS_INFINITE(loc.time)) {
619                         if (USES_IS_PENDING(loc.time) && !all_preds_known)
620                                 ARR_APP1(loc_t, delayed, loc);
621                         else
622                                 ARR_APP1(loc_t, starters, loc);
623                 }
624         }
625
626         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
627         assert(ARR_LEN(delayed) <= (signed)pressure);
628         free_slots          = n_regs - ARR_LEN(starters);
629         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
630         free_slots          = MIN(free_slots, free_pressure_slots);
631
632         /* so far we only put nodes into the starters list that are used inside
633          * the loop. If register pressure in the loop is low then we can take some
634          * values and let them live through the loop */
635         DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
636             pressure, free_slots));
637         if (free_slots > 0) {
638                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
639
640                 for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
641                         int    p, arity;
642                         loc_t *loc = & delayed[i];
643
644                         if (!is_Phi(loc->node)) {
645                                 /* don't use values which are dead in a known predecessors
646                                  * to not induce unnecessary reloads */
647                                 arity = get_irn_arity(block);
648                                 for (p = 0; p < arity; ++p) {
649                                         ir_node      *pred_block = get_Block_cfgpred_block(block, p);
650                                         block_info_t *pred_info  = get_block_info(pred_block);
651
652                                         if (pred_info == NULL)
653                                                 continue;
654
655                                         if (!workset_contains(pred_info->end_workset, loc->node)) {
656                                                 DB((dbg, DBG_START,
657                                                         "    delayed %+F not live at pred %+F\n", loc->node,
658                                                         pred_block));
659                                                 goto skip_delayed;
660                                         }
661                                 }
662                         }
663
664                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
665                         ARR_APP1(loc_t, starters, *loc);
666                         loc->node = NULL;
667                         --free_slots;
668                 skip_delayed:
669                         ;
670                 }
671         }
672
673         /* spill phis (the actual phis not just their values) that are in this block
674          * but not in the start workset */
675         for (i = ARR_LEN(delayed) - 1; i >= 0; --i) {
676                 ir_node *node = delayed[i].node;
677                 if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
678                         continue;
679
680                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
681                 be_spill_phi(senv, node);
682         }
683         DEL_ARR_F(delayed);
684
685         /* Sort start values by first use */
686         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
687
688         /* Copy the best ones from starters to start workset */
689         ws_count = MIN(ARR_LEN(starters), n_regs);
690         workset_clear(ws);
691         workset_bulk_fill(ws, ws_count, starters);
692
693         /* spill phis (the actual phis not just their values) that are in this block
694          * but not in the start workset */
695         len = ARR_LEN(starters);
696         for (i = ws_count; i < len; ++i) {
697                 ir_node *node = starters[i].node;
698                 if (! is_Phi(node) || get_nodes_block(node) != block)
699                         continue;
700
701                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
702                 be_spill_phi(senv, node);
703         }
704
705         DEL_ARR_F(starters);
706
707         /* determine spill status of the values: If there's 1 pred block (which
708          * is no backedge) where the value is spilled then we must set it to
709          * spilled here. */
710         for(i = 0; i < ws_count; ++i) {
711                 loc_t   *loc     = &ws->vals[i];
712                 ir_node *value   = loc->node;
713                 bool     spilled;
714                 int      n;
715
716                 /* phis from this block aren't spilled */
717                 if (get_nodes_block(value) == block) {
718                         assert(is_Phi(value));
719                         loc->spilled = false;
720                         continue;
721                 }
722
723                 /* determine if value was spilled on any predecessor */
724                 spilled = false;
725                 for(n = 0; n < arity; ++n) {
726                         workset_t *pred_workset = pred_worksets[n];
727                         int        p_len;
728                         int        p;
729
730                         if (pred_workset == NULL)
731                                 continue;
732
733                         p_len = workset_get_length(pred_workset);
734                         for(p = 0; p < p_len; ++p) {
735                                 loc_t *l = &pred_workset->vals[p];
736
737                                 if (l->node != value)
738                                         continue;
739
740                                 if (l->spilled) {
741                                         spilled = true;
742                                 }
743                                 break;
744                         }
745                 }
746
747                 loc->spilled = spilled;
748         }
749 }
750
751 /**
752  * For the given block @p block, decide for each values
753  * whether it is used from a register or is reloaded
754  * before the use.
755  */
756 static void process_block(ir_node *block)
757 {
758         workset_t       *new_vals;
759         ir_node         *irn;
760         int              iter;
761         block_info_t    *block_info;
762         int              arity;
763
764         /* no need to process a block twice */
765         assert(get_block_info(block) == NULL);
766
767         /* construct start workset */
768         arity = get_Block_n_cfgpreds(block);
769         if (arity == 0) {
770                 /* no predecessor -> empty set */
771                 workset_clear(ws);
772         } else if (arity == 1) {
773                 /* one predecessor, copy it's end workset */
774                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
775                 block_info_t *pred_info  = get_block_info(pred_block);
776
777                 assert(pred_info != NULL);
778                 workset_copy(ws, pred_info->end_workset);
779         } else {
780                 /* multiple predecessors, do more advanced magic :) */
781                 decide_start_workset(block);
782         }
783
784         DB((dbg, DBG_DECIDE, "\n"));
785         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
786
787         block_info = new_block_info();
788         set_block_info(block, block_info);
789
790         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
791         workset_foreach(ws, irn, iter) {
792                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
793                      workset_get_time(ws, iter)));
794         }
795
796         block_info->start_workset = workset_clone(ws);
797
798         /* process the block from start to end */
799         DB((dbg, DBG_WSETS, "Processing...\n"));
800         instr_nr = 0;
801         /* TODO: this leaks (into the obstack)... */
802         new_vals = new_workset();
803
804         sched_foreach(block, irn) {
805                 int i, arity;
806                 assert(workset_get_length(ws) <= n_regs);
807
808                 /* Phis are no real instr (see insert_starters()) */
809                 if (is_Phi(irn)) {
810                         continue;
811                 }
812                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
813
814                 /* set instruction in the workset */
815                 instr = irn;
816
817                 /* allocate all values _used_ by this instruction */
818                 workset_clear(new_vals);
819                 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
820                         ir_node *in = get_irn_n(irn, i);
821                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, in))
822                                 continue;
823
824                         /* (note that "spilled" is irrelevant here) */
825                         workset_insert(new_vals, in, false);
826                 }
827                 displace(new_vals, 1);
828
829                 /* allocate all values _defined_ by this instruction */
830                 workset_clear(new_vals);
831                 if (get_irn_mode(irn) == mode_T) {
832                         const ir_edge_t *edge;
833
834                         foreach_out_edge(irn, edge) {
835                                 ir_node *proj = get_edge_src_irn(edge);
836                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
837                                         continue;
838                                 workset_insert(new_vals, proj, false);
839                         }
840                 } else {
841                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, irn))
842                                 continue;
843                         workset_insert(new_vals, irn, false);
844                 }
845                 displace(new_vals, 0);
846
847                 instr_nr++;
848         }
849
850         /* Remember end-workset for this block */
851         block_info->end_workset = workset_clone(ws);
852         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
853         workset_foreach(ws, irn, iter)
854                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
855                      workset_get_time(ws, iter)));
856 }
857
858 /**
859  * 'decide' is block-local and makes assumptions
860  * about the set of live-ins. Thus we must adapt the
861  * live-outs to the live-ins at each block-border.
862  */
863 static void fix_block_borders(ir_node *block, void *data)
864 {
865         workset_t    *start_workset;
866         int           arity;
867         int           i;
868         int           iter;
869         (void) data;
870
871         DB((dbg, DBG_FIX, "\n"));
872         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
873
874         arity = get_irn_arity(block);
875         /* can happen for endless loops */
876         if (arity == 0)
877                 return;
878
879         start_workset = get_block_info(block)->start_workset;
880
881         /* process all pred blocks */
882         for (i = 0; i < arity; ++i) {
883                 ir_node   *pred = get_Block_cfgpred_block(block, i);
884                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
885                 ir_node   *node;
886
887                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
888
889                 /* spill all values not used anymore */
890                 workset_foreach(pred_end_workset, node, iter) {
891                         ir_node *n2;
892                         int      iter2;
893                         bool     found = false;
894                         workset_foreach(start_workset, n2, iter2) {
895                                 if (n2 == node) {
896                                         found = true;
897                                         break;
898                                 }
899                                 /* note that we do not look at phi inputs, becuase the values
900                                  * will be either live-end and need no spill or
901                                  * they have other users in which must be somewhere else in the
902                                  * workset */
903                         }
904
905                         if (found)
906                                 continue;
907
908                         if (move_spills && be_is_live_in(lv, block, node)
909                                         && !pred_end_workset->vals[iter].spilled) {
910                                 ir_node *insert_point;
911                                 if (arity > 1) {
912                                         insert_point = be_get_end_of_block_insertion_point(pred);
913                                         insert_point = sched_prev(insert_point);
914                                 } else {
915                                         insert_point = block;
916                                 }
917                                 DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
918                                      insert_point));
919                                 be_add_spill(senv, node, insert_point);
920                         }
921                 }
922
923                 /* reload missing values in predecessors, add missing spills */
924                 workset_foreach(start_workset, node, iter) {
925                         const loc_t *l    = &start_workset->vals[iter];
926                         const loc_t *pred_loc;
927
928                         /* if node is a phi of the current block we reload
929                          * the corresponding argument, else node itself */
930                         if (is_Phi(node) && get_nodes_block(node) == block) {
931                                 node = get_irn_n(node, i);
932                                 assert(!l->spilled);
933
934                                 /* we might have unknowns as argument for the phi */
935                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
936                                         continue;
937                         }
938
939                         /* check if node is in a register at end of pred */
940                         pred_loc = workset_contains(pred_end_workset, node);
941                         if (pred_loc != NULL) {
942                                 /* we might have to spill value on this path */
943                                 if (move_spills && !pred_loc->spilled && l->spilled) {
944                                         ir_node *insert_point
945                                                 = be_get_end_of_block_insertion_point(pred);
946                                         insert_point = sched_prev(insert_point);
947                                         DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
948                                             insert_point));
949                                         be_add_spill(senv, node, insert_point);
950                                 }
951                         } else {
952                                 /* node is not in register at the end of pred -> reload it */
953                                 DB((dbg, DBG_FIX, "    reload %+F\n", node));
954                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
955                                 be_add_reload_on_edge(senv, node, block, i, cls, 1);
956                         }
957                 }
958         }
959 }
960
961 static void add_block(ir_node *block, void *data)
962 {
963         (void) data;
964         ARR_APP1(ir_node*, blocklist, block);
965 }
966
967 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
968 {
969         int i;
970         ir_graph *irg = be_get_birg_irg(birg);
971
972         be_liveness_assure_sets(be_assure_liveness(birg));
973
974         stat_ev_tim_push();
975         /* construct control flow loop tree */
976         if (! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
977                 construct_cf_backedges(irg);
978         }
979         stat_ev_tim_pop("belady_time_backedges");
980
981         stat_ev_tim_push();
982         be_clear_links(irg);
983         stat_ev_tim_pop("belady_time_clear_links");
984
985         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
986
987         /* init belady env */
988         stat_ev_tim_push();
989         obstack_init(&obst);
990         arch_env  = birg->main_env->arch_env;
991         cls       = rcls;
992         lv        = be_get_birg_liveness(birg);
993         n_regs    = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
994         ws        = new_workset();
995         uses      = be_begin_uses(irg, lv);
996         loop_ana  = be_new_loop_pressure(birg, cls);
997         senv      = be_new_spill_env(birg);
998         blocklist = NEW_ARR_F(ir_node*, 0);
999         irg_block_edges_walk(get_irg_start_block(irg), NULL, add_block, NULL);
1000         stat_ev_tim_pop("belady_time_init");
1001
1002         stat_ev_tim_push();
1003         /* walk blocks in reverse postorder */
1004         for (i = ARR_LEN(blocklist) - 1; i >= 0; --i) {
1005                 process_block(blocklist[i]);
1006         }
1007         DEL_ARR_F(blocklist);
1008         stat_ev_tim_pop("belady_time_belady");
1009
1010         stat_ev_tim_push();
1011         /* belady was block-local, fix the global flow by adding reloads on the
1012          * edges */
1013         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1014         stat_ev_tim_pop("belady_time_fix_borders");
1015
1016         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1017
1018         /* Insert spill/reload nodes into the graph and fix usages */
1019         be_insert_spills_reloads(senv);
1020
1021         /* clean up */
1022         be_delete_spill_env(senv);
1023         be_end_uses(uses);
1024         be_free_loop_pressure(loop_ana);
1025         obstack_free(&obst, NULL);
1026 }
1027
1028 void be_init_spillbelady(void)
1029 {
1030         static be_spiller_t belady_spiller = {
1031                 be_spill_belady
1032         };
1033         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
1034         lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
1035         lc_opt_add_table(belady_group, options);
1036
1037         be_register_spiller("belady", &belady_spiller);
1038         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
1039 }
1040
1041 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);