- split array.h into array.h/array_t.h and make array.h independent of
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  * @version     $Id$
26  */
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include <stdbool.h>
32
33 #include "obst.h"
34 #include "irprintf_t.h"
35 #include "irgraph.h"
36 #include "irnode.h"
37 #include "irmode.h"
38 #include "irgwalk.h"
39 #include "irloop.h"
40 #include "iredges_t.h"
41 #include "ircons_t.h"
42 #include "irprintf.h"
43 #include "irnodeset.h"
44 #include "xmalloc.h"
45 #include "pdeq.h"
46
47 #include "beutil.h"
48 #include "bearch_t.h"
49 #include "beuses.h"
50 #include "besched_t.h"
51 #include "beirgmod.h"
52 #include "belive_t.h"
53 #include "benode_t.h"
54 #include "bechordal_t.h"
55 #include "bespilloptions.h"
56 #include "beloopana.h"
57 #include "beirg_t.h"
58 #include "bespill.h"
59 #include "bemodule.h"
60
61 #define DBG_SPILL     1
62 #define DBG_WSETS     2
63 #define DBG_FIX       4
64 #define DBG_DECIDE    8
65 #define DBG_START    16
66 #define DBG_SLOTS    32
67 #define DBG_TRACE    64
68 #define DBG_WORKSET 128
69 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
70
71 #define TIME_UNDEFINED 6666
72
73 //#define LOOK_AT_LOOPDEPTH
74
75 /**
76  * An association between a node and a point in time.
77  */
78 typedef struct loc_t {
79         ir_node          *node;
80         unsigned          time;     /**< A use time (see beuses.h). */
81         bool              spilled;  /**< the value was already spilled on this path */
82 } loc_t;
83
84 typedef struct _workset_t {
85         int   len;          /**< current length */
86         loc_t vals[0];      /**< inlined array of the values/distances in this working set */
87 } workset_t;
88
89 static struct obstack               obst;
90 static const arch_env_t            *arch_env;
91 static const arch_register_class_t *cls;
92 static const be_lv_t               *lv;
93 static be_loopana_t                *loop_ana;
94 static int                          n_regs;
95 static workset_t                   *ws;     /**< the main workset used while
96                                                      processing a block. */
97 static be_uses_t                   *uses;   /**< env for the next-use magic */
98 static ir_node                     *instr;  /**< current instruction */
99 static unsigned                     instr_nr; /**< current instruction number
100                                                        (relative to block start) */
101 static ir_nodeset_t                 used;
102 static spill_env_t                 *senv;   /**< see bespill.h */
103 static pdeq                        *worklist;
104
105 static bool                         move_spills      = true;
106 static bool                         respectloopdepth = true;
107 static bool                         improve_known_preds = true;
108 /* factor to weight the different costs of reloading/rematerializing a node
109    (see bespill.h be_get_reload_costs_no_weight) */
110 static int                          remat_bonus      = 10;
111
112 static const lc_opt_table_entry_t options[] = {
113         LC_OPT_ENT_BOOL   ("movespills", "try to move spills out of loops", &move_spills),
114         LC_OPT_ENT_BOOL   ("respectloopdepth", "exprimental (outermost loop cutting)", &respectloopdepth),
115         LC_OPT_ENT_BOOL   ("improveknownpreds", "experimental (known preds cutting)", &improve_known_preds),
116         LC_OPT_ENT_INT    ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
117         LC_OPT_LAST
118 };
119
120 static int loc_compare(const void *a, const void *b)
121 {
122         const loc_t *p = a;
123         const loc_t *q = b;
124         return p->time - q->time;
125 }
126
127 void workset_print(const workset_t *w)
128 {
129         int i;
130
131         for(i = 0; i < w->len; ++i) {
132                 ir_fprintf(stderr, "%+F %d\n", w->vals[i].node, w->vals[i].time);
133         }
134 }
135
136 /**
137  * Alloc a new workset on obstack @p ob with maximum size @p max
138  */
139 static workset_t *new_workset(void)
140 {
141         workset_t *res;
142         size_t     size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
143
144         res  = obstack_alloc(&obst, size);
145         memset(res, 0, size);
146         return res;
147 }
148
149 /**
150  * Alloc a new instance on obstack and make it equal to @param workset
151  */
152 static workset_t *workset_clone(workset_t *workset)
153 {
154         workset_t *res;
155         size_t size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
156         res = obstack_alloc(&obst, size);
157         memcpy(res, workset, size);
158         return res;
159 }
160
161 /**
162  * Copy workset @param src to @param tgt
163  */
164 static void workset_copy(workset_t *dest, const workset_t *src)
165 {
166         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
167         memcpy(dest, src, size);
168 }
169
170 /**
171  * Overwrites the current content array of @param ws with the
172  * @param count locations given at memory @param locs.
173  * Set the length of @param ws to count.
174  */
175 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
176 {
177         workset->len = count;
178         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
179 }
180
181 /**
182  * Inserts the value @p val into the workset, iff it is not
183  * already contained. The workset must not be full.
184  */
185 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
186 {
187         loc_t *loc;
188         int    i;
189         /* check for current regclass */
190         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, val));
191
192         /* check if val is already contained */
193         for (i = 0; i < workset->len; ++i) {
194                 loc = &workset->vals[i];
195                 if (loc->node == val) {
196                         if (spilled) {
197                                 loc->spilled = true;
198                         }
199                         return;
200                 }
201         }
202
203         /* insert val */
204         assert(workset->len < n_regs && "Workset already full!");
205         loc           = &workset->vals[workset->len];
206         loc->node     = val;
207         loc->spilled  = spilled;
208         loc->time     = TIME_UNDEFINED;
209         workset->len++;
210 }
211
212 /**
213  * Removes all entries from this workset
214  */
215 static void workset_clear(workset_t *workset)
216 {
217         workset->len = 0;
218 }
219
220 /**
221  * Removes the value @p val from the workset if present.
222  */
223 static INLINE void workset_remove(workset_t *workset, ir_node *val)
224 {
225         int i;
226         for(i = 0; i < workset->len; ++i) {
227                 if (workset->vals[i].node == val) {
228                         workset->vals[i] = workset->vals[--workset->len];
229                         return;
230                 }
231         }
232 }
233
234 static INLINE const loc_t *workset_contains(const workset_t *ws,
235                                             const ir_node *val)
236 {
237         int i;
238
239         for (i = 0; i < ws->len; ++i) {
240                 if (ws->vals[i].node == val)
241                         return &ws->vals[i];
242         }
243
244         return NULL;
245 }
246
247 /**
248  * Iterates over all values in the working set.
249  * @p ws The workset to iterate
250  * @p v  A variable to put the current value in
251  * @p i  An integer for internal use
252  */
253 #define workset_foreach(ws, v, i)       for(i=0; \
254                                                                                 v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; \
255                                                                                 ++i)
256
257 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
258 #define workset_get_time(ws, i) (ws)->vals[i].time
259 #define workset_set_length(ws, length) (ws)->len = length
260 #define workset_get_length(ws) ((ws)->len)
261 #define workset_get_val(ws, i) ((ws)->vals[i].node)
262 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
263
264 typedef struct _block_info_t
265 {
266         workset_t *start_workset;
267         workset_t *end_workset;
268 } block_info_t;
269
270
271 static void *new_block_info(void)
272 {
273         block_info_t *res = obstack_alloc(&obst, sizeof(res[0]));
274         memset(res, 0, sizeof(res[0]));
275
276         return res;
277 }
278
279 #define get_block_info(block)        ((block_info_t *)get_irn_link(block))
280 #define set_block_info(block, info)  set_irn_link(block, info)
281
282 /**
283  * @return The distance to the next use or 0 if irn has dont_spill flag set
284  */
285 static INLINE unsigned get_distance(ir_node *from, unsigned from_step,
286                                     const ir_node *def, int skip_from_uses)
287 {
288         be_next_use_t use;
289         int           flags = arch_irn_get_flags(arch_env, def);
290         unsigned      costs;
291         unsigned      time;
292
293         assert(! (flags & arch_irn_flags_ignore));
294
295         use  = be_get_next_use(uses, from, from_step, def, skip_from_uses);
296         time = use.time;
297         if (USES_IS_INFINITE(time))
298                 return USES_INFINITY;
299
300         /* We have to keep nonspillable nodes in the workingset */
301         if (flags & arch_irn_flags_dont_spill)
302                 return 0;
303
304         /* give some bonus to rematerialisable nodes */
305         if (remat_bonus > 0) {
306                 costs = be_get_reload_costs_no_weight(senv, def, use.before);
307                 assert(costs * remat_bonus < 1000);
308                 time  += 1000 - (costs * remat_bonus);
309         }
310
311         return time;
312 }
313
314 /**
315  * Performs the actions necessary to grant the request that:
316  * - new_vals can be held in registers
317  * - as few as possible other values are disposed
318  * - the worst values get disposed
319  *
320  * @p is_usage indicates that the values in new_vals are used (not defined)
321  * In this case reloads must be performed
322  */
323 static void displace(workset_t *new_vals, int is_usage)
324 {
325         ir_node **to_insert = alloca(n_regs * sizeof(to_insert[0]));
326         bool     *spilled   = alloca(n_regs * sizeof(spilled[0]));
327         ir_node  *val;
328         int       i;
329         int       len;
330         int       spills_needed;
331         int       demand;
332         int       iter;
333
334         /* 1. Identify the number of needed slots and the values to reload */
335         demand = 0;
336         workset_foreach(new_vals, val, iter) {
337                 bool reloaded = false;
338
339                 /* mark value as used */
340                 if (is_usage)
341                         ir_nodeset_insert(&used, val);
342
343                 if (! workset_contains(ws, val)) {
344                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
345                         if (is_usage) {
346                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
347                                 be_add_reload(senv, val, instr, cls, 1);
348                                 reloaded = true;
349                         }
350                 } else {
351                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
352                         assert(is_usage);
353                         /* remove the value from the current workset so it is not accidently
354                          * spilled */
355                         workset_remove(ws, val);
356                 }
357                 spilled[demand]   = reloaded;
358                 to_insert[demand] = val;
359                 ++demand;
360         }
361
362         /* 2. Make room for at least 'demand' slots */
363         len           = workset_get_length(ws);
364         spills_needed = len + demand - n_regs;
365         assert(spills_needed <= len);
366
367         /* Only make more free room if we do not have enough */
368         if (spills_needed > 0) {
369                 ir_node   *curr_bb  = NULL;
370                 workset_t *ws_start = NULL;
371
372                 if (move_spills) {
373                         curr_bb  = get_nodes_block(instr);
374                         ws_start = get_block_info(curr_bb)->start_workset;
375                 }
376
377                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
378
379                 /* calculate current next-use distance for live values */
380                 for (i = 0; i < len; ++i) {
381                         ir_node  *val  = workset_get_val(ws, i);
382                         unsigned  dist = get_distance(instr, instr_nr, val, !is_usage);
383                         workset_set_time(ws, i, dist);
384                 }
385
386                 /* sort entries by increasing nextuse-distance*/
387                 workset_sort(ws);
388
389                 for (i = len - spills_needed; i < len; ++i) {
390                         ir_node *val = ws->vals[i].node;
391
392                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
393                              workset_get_time(ws, i)));
394
395                         if (move_spills) {
396                                 if (!USES_IS_INFINITE(ws->vals[i].time)
397                                                 && !ws->vals[i].spilled) {
398                                         ir_node *after_pos = sched_prev(instr);
399                                         DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
400                                                 after_pos));
401                                         be_add_spill(senv, val, after_pos);
402                                 }
403                         } else {
404                                 /* Logic for not needed live-ins: If a value is disposed
405                                  * before its first use, remove it from start workset
406                                  * We don't do this for phis though     */
407                                 if (!is_Phi(val) && ! ir_nodeset_contains(&used, val)) {
408                                         workset_remove(ws_start, val);
409                                         DB((dbg, DBG_DECIDE, "    (and removing %+F from start workset)\n", val));
410                                 }
411                         }
412                 }
413
414                 /* kill the last 'demand' entries in the array */
415                 workset_set_length(ws, len - spills_needed);
416         }
417
418         /* 3. Insert the new values into the workset */
419         for (i = 0; i < demand; ++i) {
420                 ir_node *val = to_insert[i];
421
422                 workset_insert(ws, val, spilled[i]);
423         }
424 }
425
426 enum {
427         AVAILABLE_EVERYWHERE,
428         AVAILABLE_NOWHERE,
429         AVAILABLE_PARTLY,
430         AVAILABLE_UNKNOWN
431 };
432
433 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
434                                        size_t n_pred_worksets,
435                                        const ir_node *value, bool is_local_phi)
436 {
437         size_t i;
438         bool   avail_everywhere = true;
439         bool   avail_nowhere    = true;
440
441         assert(n_pred_worksets > 0);
442
443         /* value available in all preds? */
444         for (i = 0; i < n_pred_worksets; ++i) {
445                 bool             found     = false;
446                 const workset_t *p_workset = pred_worksets[i];
447                 int              p_len     = workset_get_length(p_workset);
448                 int              p_i;
449                 const ir_node   *l_value;
450
451                 if (is_local_phi) {
452                         assert(is_Phi(value));
453                         l_value = get_irn_n(value, i);
454                 } else {
455                         l_value = value;
456                 }
457
458                 for (p_i = 0; p_i < p_len; ++p_i) {
459                         const loc_t *p_l = &p_workset->vals[p_i];
460                         if (p_l->node != l_value)
461                                 continue;
462
463                         found = true;
464                         break;
465                 }
466
467                 if (found) {
468                         avail_nowhere = false;
469                 } else {
470                         avail_everywhere = false;
471                 }
472         }
473
474         if (avail_everywhere) {
475                 assert(!avail_nowhere);
476                 return AVAILABLE_EVERYWHERE;
477         } else if (avail_nowhere) {
478                 return AVAILABLE_NOWHERE;
479         } else {
480                 return AVAILABLE_PARTLY;
481         }
482 }
483
484 /** Decides whether a specific node should be in the start workset or not
485  *
486  * @param env      belady environment
487  * @param first
488  * @param node     the node to test
489  * @param loop     the loop of the node
490  */
491 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
492                                     ir_loop *loop, unsigned available)
493 {
494         be_next_use_t next_use;
495         loc_t         loc;
496
497         loc.time    = USES_INFINITY;
498         loc.node    = node;
499         loc.spilled = false;
500
501         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
502                 loc.time = USES_INFINITY;
503                 return loc;
504         }
505
506         /* We have to keep nonspillable nodes in the workingset */
507         if (arch_irn_get_flags(arch_env, node) & arch_irn_flags_dont_spill) {
508                 loc.time = 0;
509                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
510                 return loc;
511         }
512
513         next_use = be_get_next_use(uses, first, 0, node, 0);
514         if (USES_IS_INFINITE(next_use.time)) {
515                 // the nodes marked as live in shouldn't be dead, so it must be a phi
516                 assert(is_Phi(node));
517                 loc.time = USES_INFINITY;
518                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
519                 return loc;
520         }
521
522         loc.time = next_use.time;
523
524         if (improve_known_preds) {
525                 if (available == AVAILABLE_EVERYWHERE) {
526                         DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n",
527                             node, loc.time));
528                         return loc;
529                 } else if(available == AVAILABLE_NOWHERE) {
530                         DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n",
531                             node, loc.time));
532                         loc.time = USES_INFINITY;
533                         return loc;
534                 }
535         }
536
537         if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
538                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
539                     next_use.outermost_loop));
540         } else {
541                 loc.time = USES_PENDING;
542                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
543                     node, next_use.outermost_loop, get_loop_depth(loop)));
544         }
545
546         return loc;
547 }
548
549 /**
550  * Computes the start-workset for a block with multiple predecessors. We assume
551  * that at least 1 of the predeccesors is a back-edge which means we're at the
552  * beginning of a loop. We try to reload as much values as possible now so they
553  * don't get reloaded inside the loop.
554  */
555 static void decide_start_workset(const ir_node *block)
556 {
557         ir_loop    *loop = get_irn_loop(block);
558         ir_node    *first;
559         ir_node    *node;
560         loc_t       loc;
561         loc_t      *starters;
562         loc_t      *delayed;
563         int         i, len, ws_count;
564         int             free_slots, free_pressure_slots;
565         unsigned    pressure;
566         int         arity;
567         workset_t **pred_worksets;
568         bool        all_preds_known;
569
570         /* check predecessors */
571         arity           = get_irn_arity(block);
572         pred_worksets   = alloca(sizeof(pred_worksets[0]) * arity);
573         all_preds_known = true;
574         for(i = 0; i < arity; ++i) {
575                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
576                 block_info_t *pred_info  = get_block_info(pred_block);
577
578                 if (pred_info == NULL) {
579                         pred_worksets[i] = NULL;
580                         all_preds_known  = false;
581                 } else {
582                         pred_worksets[i] = pred_info->end_workset;
583                 }
584         }
585
586         /* Collect all values living at start of block */
587         starters = NEW_ARR_F(loc_t, 0);
588         delayed  = NEW_ARR_F(loc_t, 0);
589
590         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
591         first = sched_first(block);
592
593         /* check all Phis first */
594         sched_foreach(block, node) {
595                 unsigned available;
596
597                 if (! is_Phi(node))
598                         break;
599                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
600                         continue;
601
602                 if (all_preds_known) {
603                         available = available_in_all_preds(pred_worksets, arity, node, true);
604                 } else {
605                         available = AVAILABLE_UNKNOWN;
606                 }
607
608                 loc = to_take_or_not_to_take(first, node, loop, available);
609
610                 if (! USES_IS_INFINITE(loc.time)) {
611                         if (USES_IS_PENDING(loc.time))
612                                 ARR_APP1(loc_t, delayed, loc);
613                         else
614                                 ARR_APP1(loc_t, starters, loc);
615                 } else {
616                         be_spill_phi(senv, node);
617                 }
618         }
619
620         /* check all Live-Ins */
621         be_lv_foreach(lv, block, be_lv_state_in, i) {
622                 ir_node *node = be_lv_get_irn(lv, block, i);
623                 unsigned available;
624
625                 if (all_preds_known) {
626                         available = available_in_all_preds(pred_worksets, arity, node, false);
627                 } else {
628                         available = AVAILABLE_UNKNOWN;
629                 }
630
631                 loc = to_take_or_not_to_take(first, node, loop, available);
632
633                 if (! USES_IS_INFINITE(loc.time)) {
634                         if (USES_IS_PENDING(loc.time))
635                                 ARR_APP1(loc_t, delayed, loc);
636                         else
637                                 ARR_APP1(loc_t, starters, loc);
638                 }
639         }
640
641         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
642         assert(ARR_LEN(delayed) <= (signed)pressure);
643         free_slots          = n_regs - ARR_LEN(starters);
644         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
645         free_slots          = MIN(free_slots, free_pressure_slots);
646
647         /* so far we only put nodes into the starters list that are used inside
648          * the loop. If register pressure in the loop is low then we can take some
649          * values and let them live through the loop */
650         DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
651             pressure, free_slots));
652         if (free_slots > 0) {
653                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
654
655                 for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
656                         int    p, arity;
657                         loc_t *loc = & delayed[i];
658
659                         /* don't use values which are dead in a known predecessors
660                          * to not induce unnecessary reloads */
661                         arity = get_irn_arity(block);
662                         for (p = 0; p < arity; ++p) {
663                                 ir_node      *pred_block = get_Block_cfgpred_block(block, p);
664                                 block_info_t *pred_info  = get_block_info(pred_block);
665
666                                 if (pred_info == NULL)
667                                         continue;
668
669                                 if (!workset_contains(pred_info->end_workset, loc->node)) {
670                                         DB((dbg, DBG_START,
671                                             "    delayed %+F not live at pred %+F\n", loc->node,
672                                             pred_block));
673                                         goto skip_delayed;
674                                 }
675                         }
676
677                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
678                         ARR_APP1(loc_t, starters, *loc);
679                         loc->node = NULL;
680                         --free_slots;
681                 skip_delayed:
682                         ;
683                 }
684         }
685
686         /* spill phis (the actual phis not just their values) that are in this block
687          * but not in the start workset */
688         for (i = ARR_LEN(delayed) - 1; i >= 0; --i) {
689                 ir_node *node = delayed[i].node;
690                 if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
691                         continue;
692
693                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
694                 be_spill_phi(senv, node);
695         }
696         DEL_ARR_F(delayed);
697
698         /* Sort start values by first use */
699         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
700
701         /* Copy the best ones from starters to start workset */
702         ws_count = MIN(ARR_LEN(starters), n_regs);
703         workset_clear(ws);
704         workset_bulk_fill(ws, ws_count, starters);
705
706         /* spill phis (the actual phis not just their values) that are in this block
707          * but not in the start workset */
708         len = ARR_LEN(starters);
709         for (i = ws_count; i < len; ++i) {
710                 ir_node *node = starters[i].node;
711                 if (! is_Phi(node) || get_nodes_block(node) != block)
712                         continue;
713
714                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
715                 be_spill_phi(senv, node);
716         }
717
718         DEL_ARR_F(starters);
719
720         /* determine spill status of the values: If there's 1 pred block (which
721          * is no backedge) where the value is spilled then we must set it to
722          * spilled here. */
723         for(i = 0; i < ws_count; ++i) {
724                 loc_t   *loc     = &ws->vals[i];
725                 ir_node *value   = loc->node;
726                 bool     spilled;
727                 int      n;
728
729                 /* phis from this block aren't spilled */
730                 if (get_nodes_block(value) == block) {
731                         assert(is_Phi(value));
732                         loc->spilled = false;
733                         continue;
734                 }
735
736                 /* determine if value was spilled on any predecessor */
737                 spilled = false;
738                 for(n = 0; n < arity; ++n) {
739                         workset_t *pred_workset = pred_worksets[n];
740                         int        p_len;
741                         int        p;
742
743                         if (pred_workset == NULL)
744                                 continue;
745
746                         p_len = workset_get_length(pred_workset);
747                         for(p = 0; p < p_len; ++p) {
748                                 loc_t *l = &pred_workset->vals[p];
749
750                                 if (l->node != value)
751                                         continue;
752
753                                 if (l->spilled) {
754                                         spilled = true;
755                                 }
756                                 break;
757                         }
758                 }
759
760                 loc->spilled = spilled;
761         }
762 }
763
764 /**
765  * For the given block @p block, decide for each values
766  * whether it is used from a register or is reloaded
767  * before the use.
768  */
769 static void belady(ir_node *block)
770 {
771         workset_t       *new_vals;
772         ir_node         *irn;
773         int              iter;
774         block_info_t    *block_info;
775         int              i, arity;
776         int              has_backedges = 0;
777         //int              first         = 0;
778         const ir_edge_t *edge;
779
780         /* no need to process a block twice */
781         if (get_block_info(block) != NULL) {
782                 return;
783         }
784
785         /* check if all predecessor blocks are processed yet (though for backedges
786          * we have to make an exception as we can't process them first) */
787         arity = get_Block_n_cfgpreds(block);
788         for(i = 0; i < arity; ++i) {
789                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
790                 block_info_t *pred_info  = get_block_info(pred_block);
791
792                 if (pred_info == NULL) {
793                         /* process predecessor first (it will be in the queue already) */
794                         if (!is_backedge(block, i)) {
795                                 return;
796                         }
797                         has_backedges = 1;
798                 }
799         }
800         (void) has_backedges;
801         if (arity == 0) {
802                 workset_clear(ws);
803         } else if (arity == 1) {
804                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
805                 block_info_t *pred_info  = get_block_info(pred_block);
806
807                 assert(pred_info != NULL);
808                 workset_copy(ws, pred_info->end_workset);
809         } else {
810                 /* we need 2 heuristics here, for the case when all predecessor blocks
811                  * are known and when some are backedges (and therefore can't be known
812                  * yet) */
813                 decide_start_workset(block);
814         }
815
816         DB((dbg, DBG_DECIDE, "\n"));
817         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
818
819         block_info = new_block_info();
820         set_block_info(block, block_info);
821
822         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
823         workset_foreach(ws, irn, iter) {
824                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
825                      workset_get_time(ws, iter)));
826         }
827
828         block_info->start_workset = workset_clone(ws);
829
830         /* process the block from start to end */
831         DB((dbg, DBG_WSETS, "Processing...\n"));
832         ir_nodeset_init(&used);
833         instr_nr = 0;
834         /* TODO: this leaks (into the obstack)... */
835         new_vals = new_workset();
836
837         sched_foreach(block, irn) {
838                 int i, arity;
839                 assert(workset_get_length(ws) <= n_regs);
840
841                 /* Phis are no real instr (see insert_starters()) */
842                 if (is_Phi(irn)) {
843                         continue;
844                 }
845                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
846
847                 /* set instruction in the workset */
848                 instr = irn;
849
850                 /* allocate all values _used_ by this instruction */
851                 workset_clear(new_vals);
852                 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
853                         ir_node *in = get_irn_n(irn, i);
854                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, in))
855                                 continue;
856
857                         /* (note that "spilled" is irrelevant here) */
858                         workset_insert(new_vals, in, false);
859                 }
860                 displace(new_vals, 1);
861
862                 /* allocate all values _defined_ by this instruction */
863                 workset_clear(new_vals);
864                 if (get_irn_mode(irn) == mode_T) {
865                         const ir_edge_t *edge;
866
867                         foreach_out_edge(irn, edge) {
868                                 ir_node *proj = get_edge_src_irn(edge);
869                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
870                                         continue;
871                                 workset_insert(new_vals, proj, false);
872                         }
873                 } else {
874                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, irn))
875                                 continue;
876                         workset_insert(new_vals, irn, false);
877                 }
878                 displace(new_vals, 0);
879
880                 instr_nr++;
881         }
882         ir_nodeset_destroy(&used);
883
884         /* Remember end-workset for this block */
885         block_info->end_workset = workset_clone(ws);
886         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
887         workset_foreach(ws, irn, iter)
888                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
889                      workset_get_time(ws, iter)));
890
891         /* add successor blocks into worklist */
892         foreach_block_succ(block, edge) {
893                 ir_node *succ = get_edge_src_irn(edge);
894                 pdeq_putr(worklist, succ);
895         }
896 }
897
898 /**
899  * 'decide' is block-local and makes assumptions
900  * about the set of live-ins. Thus we must adapt the
901  * live-outs to the live-ins at each block-border.
902  */
903 static void fix_block_borders(ir_node *block, void *data)
904 {
905         workset_t    *start_workset;
906         int           arity;
907         int           i;
908         int           iter;
909         (void) data;
910
911         DB((dbg, DBG_FIX, "\n"));
912         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
913
914         start_workset = get_block_info(block)->start_workset;
915
916         /* process all pred blocks */
917         arity = get_irn_arity(block);
918         for (i = 0; i < arity; ++i) {
919                 ir_node   *pred = get_Block_cfgpred_block(block, i);
920                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
921                 ir_node   *node;
922
923                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
924
925                 /* spill all values not used anymore */
926                 workset_foreach(pred_end_workset, node, iter) {
927                         ir_node *n2;
928                         int      iter2;
929                         bool     found = false;
930                         workset_foreach(start_workset, n2, iter2) {
931                                 if (n2 == node) {
932                                         found = true;
933                                         break;
934                                 }
935                                 /* note that we do not look at phi inputs, becuase the values
936                                  * will be either live-end and need no spill or
937                                  * they have other users in which must be somewhere else in the
938                                  * workset */
939                         }
940
941                         if (found)
942                                 continue;
943
944                         if (move_spills && be_is_live_in(lv, block, node)
945                                         && !pred_end_workset->vals[iter].spilled) {
946                                 ir_node *insert_point;
947                                 if (arity > 1) {
948                                         insert_point = be_get_end_of_block_insertion_point(pred);
949                                         insert_point = sched_prev(insert_point);
950                                 } else {
951                                         insert_point = block;
952                                 }
953                                 DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
954                                      insert_point));
955                                 be_add_spill(senv, node, insert_point);
956                         }
957                 }
958
959                 /* reload missing values in predecessors, add missing spills */
960                 workset_foreach(start_workset, node, iter) {
961                         const loc_t *l    = &start_workset->vals[iter];
962                         const loc_t *pred_loc;
963
964                         /* if node is a phi of the current block we reload
965                          * the corresponding argument, else node itself */
966                         if (is_Phi(node) && get_nodes_block(node) == block) {
967                                 node = get_irn_n(node, i);
968                                 assert(!l->spilled);
969
970                                 /* we might have unknowns as argument for the phi */
971                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
972                                         continue;
973                         }
974
975                         /* check if node is in a register at end of pred */
976                         pred_loc = workset_contains(pred_end_workset, node);
977                         if (pred_loc != NULL) {
978                                 /* we might have to spill value on this path */
979                                 if (move_spills && !pred_loc->spilled && l->spilled) {
980                                         ir_node *insert_point
981                                                 = be_get_end_of_block_insertion_point(pred);
982                                         insert_point = sched_prev(insert_point);
983                                         DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
984                                             insert_point));
985                                         be_add_spill(senv, node, insert_point);
986                                 }
987                         } else {
988                                 /* node is not in register at the end of pred -> reload it */
989                                 DB((dbg, DBG_FIX, "    reload %+F\n", node));
990                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
991                                 be_add_reload_on_edge(senv, node, block, i, cls, 1);
992                         }
993                 }
994         }
995 }
996
997 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
998 {
999         ir_graph *irg = be_get_birg_irg(birg);
1000
1001         be_liveness_assure_sets(be_assure_liveness(birg));
1002
1003         /* construct control flow loop tree */
1004         if (! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
1005                 construct_cf_backedges(irg);
1006         }
1007
1008         be_clear_links(irg);
1009
1010         /* init belady env */
1011         obstack_init(&obst);
1012         arch_env = birg->main_env->arch_env;
1013         cls      = rcls;
1014         lv       = be_get_birg_liveness(birg);
1015         n_regs   = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1016         ws       = new_workset();
1017         uses     = be_begin_uses(irg, lv);
1018         loop_ana = be_new_loop_pressure(birg);
1019         senv     = be_new_spill_env(birg);
1020         worklist = new_pdeq();
1021
1022         pdeq_putr(worklist, get_irg_start_block(irg));
1023
1024         while(!pdeq_empty(worklist)) {
1025                 ir_node *block = pdeq_getl(worklist);
1026                 belady(block);
1027         }
1028         /* end block might not be reachable in endless loops */
1029         belady(get_irg_end_block(irg));
1030
1031         del_pdeq(worklist);
1032
1033         /* belady was block-local, fix the global flow by adding reloads on the
1034          * edges */
1035         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1036
1037         /* Insert spill/reload nodes into the graph and fix usages */
1038         be_insert_spills_reloads(senv);
1039
1040         /* clean up */
1041         be_delete_spill_env(senv);
1042         be_end_uses(uses);
1043         be_free_loop_pressure(loop_ana);
1044         obstack_free(&obst, NULL);
1045 }
1046
1047 void be_init_spillbelady(void)
1048 {
1049         static be_spiller_t belady_spiller = {
1050                 be_spill_belady
1051         };
1052         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
1053         lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
1054         lc_opt_add_table(belady_group, options);
1055
1056         be_register_spiller("belady", &belady_spiller);
1057         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
1058 }
1059
1060 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);