LC_OPT_ENT_BOOL needs an int
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  * @version     $Id$
26  */
27 #include "config.h"
28
29 #include <stdbool.h>
30
31 #include "obst.h"
32 #include "irprintf_t.h"
33 #include "irgraph.h"
34 #include "irnode.h"
35 #include "irmode.h"
36 #include "irgwalk.h"
37 #include "irloop.h"
38 #include "iredges_t.h"
39 #include "ircons_t.h"
40 #include "irprintf.h"
41 #include "irnodeset.h"
42
43 #include "beutil.h"
44 #include "bearch.h"
45 #include "beuses.h"
46 #include "besched.h"
47 #include "beirgmod.h"
48 #include "belive_t.h"
49 #include "benode_t.h"
50 #include "bechordal_t.h"
51 #include "bespill.h"
52 #include "beloopana.h"
53 #include "beirg.h"
54 #include "bespillutil.h"
55 #include "bemodule.h"
56
57 #define DBG_SPILL     1
58 #define DBG_WSETS     2
59 #define DBG_FIX       4
60 #define DBG_DECIDE    8
61 #define DBG_START    16
62 #define DBG_SLOTS    32
63 #define DBG_TRACE    64
64 #define DBG_WORKSET 128
65 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
66
67 #define TIME_UNDEFINED 6666
68
69 //#define LOOK_AT_LOOPDEPTH
70
71 /**
72  * An association between a node and a point in time.
73  */
74 typedef struct loc_t {
75         ir_node          *node;
76         unsigned          time;     /**< A use time (see beuses.h). */
77         bool              spilled;  /**< the value was already spilled on this path */
78 } loc_t;
79
80 typedef struct _workset_t {
81         int   len;          /**< current length */
82         loc_t vals[0];      /**< inlined array of the values/distances in this working set */
83 } workset_t;
84
85 static struct obstack               obst;
86 static const arch_register_class_t *cls;
87 static const be_lv_t               *lv;
88 static be_loopana_t                *loop_ana;
89 static int                          n_regs;
90 static workset_t                   *ws;     /**< the main workset used while
91                                                      processing a block. */
92 static be_uses_t                   *uses;   /**< env for the next-use magic */
93 static ir_node                     *instr;  /**< current instruction */
94 static unsigned                     instr_nr; /**< current instruction number
95                                                        (relative to block start) */
96 static spill_env_t                 *senv;   /**< see bespill.h */
97 static ir_node                    **blocklist;
98
99 static int                          move_spills      = true;
100 static int                          respectloopdepth = true;
101 static int                          improve_known_preds = true;
102 /* factor to weight the different costs of reloading/rematerializing a node
103    (see bespill.h be_get_reload_costs_no_weight) */
104 static int                          remat_bonus      = 10;
105
106 static const lc_opt_table_entry_t options[] = {
107         LC_OPT_ENT_BOOL   ("movespills", "try to move spills out of loops", &move_spills),
108         LC_OPT_ENT_BOOL   ("respectloopdepth", "exprimental (outermost loop cutting)", &respectloopdepth),
109         LC_OPT_ENT_BOOL   ("improveknownpreds", "experimental (known preds cutting)", &improve_known_preds),
110         LC_OPT_ENT_INT    ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
111         LC_OPT_LAST
112 };
113
114 static int loc_compare(const void *a, const void *b)
115 {
116         const loc_t *p = a;
117         const loc_t *q = b;
118         return p->time - q->time;
119 }
120
121 void workset_print(const workset_t *w)
122 {
123         int i;
124
125         for(i = 0; i < w->len; ++i) {
126                 ir_fprintf(stderr, "%+F %d\n", w->vals[i].node, w->vals[i].time);
127         }
128 }
129
130 /**
131  * Alloc a new workset on obstack @p ob with maximum size @p max
132  */
133 static workset_t *new_workset(void)
134 {
135         return OALLOCFZ(&obst, workset_t, vals, n_regs);
136 }
137
138 /**
139  * Alloc a new instance on obstack and make it equal to @param workset
140  */
141 static workset_t *workset_clone(workset_t *workset)
142 {
143         workset_t *res = OALLOCF(&obst, workset_t, vals, n_regs);
144         memcpy(res, workset, sizeof(*res) + n_regs * sizeof(res->vals[0]));
145         return res;
146 }
147
148 /**
149  * Copy workset @param src to @param tgt
150  */
151 static void workset_copy(workset_t *dest, const workset_t *src)
152 {
153         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
154         memcpy(dest, src, size);
155 }
156
157 /**
158  * Overwrites the current content array of @param ws with the
159  * @param count locations given at memory @param locs.
160  * Set the length of @param ws to count.
161  */
162 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
163 {
164         workset->len = count;
165         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
166 }
167
168 /**
169  * Inserts the value @p val into the workset, iff it is not
170  * already contained. The workset must not be full.
171  */
172 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
173 {
174         loc_t *loc;
175         int    i;
176         /* check for current regclass */
177         assert(arch_irn_consider_in_reg_alloc(cls, val));
178
179         /* check if val is already contained */
180         for (i = 0; i < workset->len; ++i) {
181                 loc = &workset->vals[i];
182                 if (loc->node == val) {
183                         if (spilled) {
184                                 loc->spilled = true;
185                         }
186                         return;
187                 }
188         }
189
190         /* insert val */
191         assert(workset->len < n_regs && "Workset already full!");
192         loc           = &workset->vals[workset->len];
193         loc->node     = val;
194         loc->spilled  = spilled;
195         loc->time     = TIME_UNDEFINED;
196         workset->len++;
197 }
198
199 /**
200  * Removes all entries from this workset
201  */
202 static void workset_clear(workset_t *workset)
203 {
204         workset->len = 0;
205 }
206
207 /**
208  * Removes the value @p val from the workset if present.
209  */
210 static inline void workset_remove(workset_t *workset, ir_node *val)
211 {
212         int i;
213         for(i = 0; i < workset->len; ++i) {
214                 if (workset->vals[i].node == val) {
215                         workset->vals[i] = workset->vals[--workset->len];
216                         return;
217                 }
218         }
219 }
220
221 static inline const loc_t *workset_contains(const workset_t *ws,
222                                             const ir_node *val)
223 {
224         int i;
225
226         for (i = 0; i < ws->len; ++i) {
227                 if (ws->vals[i].node == val)
228                         return &ws->vals[i];
229         }
230
231         return NULL;
232 }
233
234 /**
235  * Iterates over all values in the working set.
236  * @p ws The workset to iterate
237  * @p v  A variable to put the current value in
238  * @p i  An integer for internal use
239  */
240 #define workset_foreach(ws, v, i)       for(i=0; \
241                                                                                 v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; \
242                                                                                 ++i)
243
244 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
245 #define workset_get_time(ws, i) (ws)->vals[i].time
246 #define workset_set_length(ws, length) (ws)->len = length
247 #define workset_get_length(ws) ((ws)->len)
248 #define workset_get_val(ws, i) ((ws)->vals[i].node)
249 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
250
251 typedef struct _block_info_t
252 {
253         workset_t *start_workset;
254         workset_t *end_workset;
255 } block_info_t;
256
257
258 static block_info_t *new_block_info(void)
259 {
260         return OALLOCZ(&obst, block_info_t);
261 }
262
263 #define get_block_info(block)        ((block_info_t *)get_irn_link(block))
264 #define set_block_info(block, info)  set_irn_link(block, info)
265
266 /**
267  * @return The distance to the next use or 0 if irn has dont_spill flag set
268  */
269 static inline unsigned get_distance(ir_node *from, unsigned from_step,
270                                     const ir_node *def, int skip_from_uses)
271 {
272         be_next_use_t use;
273         unsigned      costs;
274         unsigned      time;
275
276         assert(!arch_irn_is_ignore(def));
277
278         use  = be_get_next_use(uses, from, from_step, def, skip_from_uses);
279         time = use.time;
280         if (USES_IS_INFINITE(time))
281                 return USES_INFINITY;
282
283         /* We have to keep nonspillable nodes in the workingset */
284         if (arch_irn_get_flags(skip_Proj_const(def)) & arch_irn_flags_dont_spill)
285                 return 0;
286
287         /* give some bonus to rematerialisable nodes */
288         if (remat_bonus > 0) {
289                 costs = be_get_reload_costs_no_weight(senv, def, use.before);
290                 assert(costs * remat_bonus < 1000);
291                 time  += 1000 - (costs * remat_bonus);
292         }
293
294         return time;
295 }
296
297 /**
298  * Performs the actions necessary to grant the request that:
299  * - new_vals can be held in registers
300  * - as few as possible other values are disposed
301  * - the worst values get disposed
302  *
303  * @p is_usage indicates that the values in new_vals are used (not defined)
304  * In this case reloads must be performed
305  */
306 static void displace(workset_t *new_vals, int is_usage)
307 {
308         ir_node **to_insert = ALLOCAN(ir_node*, n_regs);
309         bool     *spilled   = ALLOCAN(bool,     n_regs);
310         ir_node  *val;
311         int       i;
312         int       len;
313         int       spills_needed;
314         int       demand;
315         int       iter;
316
317         /* 1. Identify the number of needed slots and the values to reload */
318         demand = 0;
319         workset_foreach(new_vals, val, iter) {
320                 bool reloaded = false;
321
322                 if (! workset_contains(ws, val)) {
323                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
324                         if (is_usage) {
325                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
326                                 be_add_reload(senv, val, instr, cls, 1);
327                                 reloaded = true;
328                         }
329                 } else {
330                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
331                         assert(is_usage);
332                         /* remove the value from the current workset so it is not accidently
333                          * spilled */
334                         workset_remove(ws, val);
335                 }
336                 spilled[demand]   = reloaded;
337                 to_insert[demand] = val;
338                 ++demand;
339         }
340
341         /* 2. Make room for at least 'demand' slots */
342         len           = workset_get_length(ws);
343         spills_needed = len + demand - n_regs;
344         assert(spills_needed <= len);
345
346         /* Only make more free room if we do not have enough */
347         if (spills_needed > 0) {
348                 ir_node   *curr_bb  = NULL;
349                 workset_t *ws_start = NULL;
350
351                 if (move_spills) {
352                         curr_bb  = get_nodes_block(instr);
353                         ws_start = get_block_info(curr_bb)->start_workset;
354                 }
355
356                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
357
358                 /* calculate current next-use distance for live values */
359                 for (i = 0; i < len; ++i) {
360                         ir_node  *val  = workset_get_val(ws, i);
361                         unsigned  dist = get_distance(instr, instr_nr, val, !is_usage);
362                         workset_set_time(ws, i, dist);
363                 }
364
365                 /* sort entries by increasing nextuse-distance*/
366                 workset_sort(ws);
367
368                 for (i = len - spills_needed; i < len; ++i) {
369                         ir_node *val = ws->vals[i].node;
370
371                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
372                              workset_get_time(ws, i)));
373
374                         if (move_spills) {
375                                 if (!USES_IS_INFINITE(ws->vals[i].time)
376                                                 && !ws->vals[i].spilled) {
377                                         ir_node *after_pos = sched_prev(instr);
378                                         DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
379                                                 after_pos));
380                                         be_add_spill(senv, val, after_pos);
381                                 }
382                         }
383                 }
384
385                 /* kill the last 'demand' entries in the array */
386                 workset_set_length(ws, len - spills_needed);
387         }
388
389         /* 3. Insert the new values into the workset */
390         for (i = 0; i < demand; ++i) {
391                 ir_node *val = to_insert[i];
392
393                 workset_insert(ws, val, spilled[i]);
394         }
395 }
396
397 enum {
398         AVAILABLE_EVERYWHERE,
399         AVAILABLE_NOWHERE,
400         AVAILABLE_PARTLY,
401         AVAILABLE_UNKNOWN
402 };
403
404 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
405                                        size_t n_pred_worksets,
406                                        const ir_node *value, bool is_local_phi)
407 {
408         size_t i;
409         bool   avail_everywhere = true;
410         bool   avail_nowhere    = true;
411
412         assert(n_pred_worksets > 0);
413
414         /* value available in all preds? */
415         for (i = 0; i < n_pred_worksets; ++i) {
416                 bool             found     = false;
417                 const workset_t *p_workset = pred_worksets[i];
418                 int              p_len     = workset_get_length(p_workset);
419                 int              p_i;
420                 const ir_node   *l_value;
421
422                 if (is_local_phi) {
423                         assert(is_Phi(value));
424                         l_value = get_irn_n(value, i);
425                 } else {
426                         l_value = value;
427                 }
428
429                 for (p_i = 0; p_i < p_len; ++p_i) {
430                         const loc_t *p_l = &p_workset->vals[p_i];
431                         if (p_l->node != l_value)
432                                 continue;
433
434                         found = true;
435                         break;
436                 }
437
438                 if (found) {
439                         avail_nowhere = false;
440                 } else {
441                         avail_everywhere = false;
442                 }
443         }
444
445         if (avail_everywhere) {
446                 assert(!avail_nowhere);
447                 return AVAILABLE_EVERYWHERE;
448         } else if (avail_nowhere) {
449                 return AVAILABLE_NOWHERE;
450         } else {
451                 return AVAILABLE_PARTLY;
452         }
453 }
454
455 /** Decides whether a specific node should be in the start workset or not
456  *
457  * @param env      belady environment
458  * @param first
459  * @param node     the node to test
460  * @param loop     the loop of the node
461  */
462 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
463                                     ir_loop *loop, unsigned available)
464 {
465         be_next_use_t next_use;
466         loc_t         loc;
467
468         loc.time    = USES_INFINITY;
469         loc.node    = node;
470         loc.spilled = false;
471
472         if (!arch_irn_consider_in_reg_alloc(cls, node)) {
473                 loc.time = USES_INFINITY;
474                 return loc;
475         }
476
477         /* We have to keep nonspillable nodes in the workingset */
478         if (arch_irn_get_flags(skip_Proj_const(node)) & arch_irn_flags_dont_spill) {
479                 loc.time = 0;
480                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
481                 return loc;
482         }
483
484         next_use = be_get_next_use(uses, first, 0, node, 0);
485         if (USES_IS_INFINITE(next_use.time)) {
486                 // the nodes marked as live in shouldn't be dead, so it must be a phi
487                 assert(is_Phi(node));
488                 loc.time = USES_INFINITY;
489                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
490                 return loc;
491         }
492
493         loc.time = next_use.time;
494
495         if (improve_known_preds) {
496                 if (available == AVAILABLE_EVERYWHERE) {
497                         DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n",
498                             node, loc.time));
499                         return loc;
500                 } else if(available == AVAILABLE_NOWHERE) {
501                         DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n",
502                             node, loc.time));
503                         loc.time = USES_INFINITY;
504                         return loc;
505                 }
506         }
507
508         if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
509                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
510                     next_use.outermost_loop));
511         } else {
512                 loc.time = USES_PENDING;
513                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
514                     node, next_use.outermost_loop, get_loop_depth(loop)));
515         }
516
517         return loc;
518 }
519
520 /**
521  * Computes the start-workset for a block with multiple predecessors. We assume
522  * that at least 1 of the predeccesors is a back-edge which means we're at the
523  * beginning of a loop. We try to reload as much values as possible now so they
524  * don't get reloaded inside the loop.
525  */
526 static void decide_start_workset(const ir_node *block)
527 {
528         ir_loop    *loop = get_irn_loop(block);
529         ir_node    *first;
530         ir_node    *node;
531         loc_t       loc;
532         loc_t      *starters;
533         loc_t      *delayed;
534         int         i, len, ws_count;
535         int             free_slots, free_pressure_slots;
536         unsigned    pressure;
537         int         arity;
538         workset_t **pred_worksets;
539         bool        all_preds_known;
540
541         /* check predecessors */
542         arity           = get_irn_arity(block);
543         pred_worksets   = ALLOCAN(workset_t*, arity);
544         all_preds_known = true;
545         for(i = 0; i < arity; ++i) {
546                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
547                 block_info_t *pred_info  = get_block_info(pred_block);
548
549                 if (pred_info == NULL) {
550                         pred_worksets[i] = NULL;
551                         all_preds_known  = false;
552                 } else {
553                         pred_worksets[i] = pred_info->end_workset;
554                 }
555         }
556
557         /* Collect all values living at start of block */
558         starters = NEW_ARR_F(loc_t, 0);
559         delayed  = NEW_ARR_F(loc_t, 0);
560
561         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
562         first = sched_first(block);
563
564         /* check all Phis first */
565         sched_foreach(block, node) {
566                 unsigned available;
567
568                 if (! is_Phi(node))
569                         break;
570                 if (!arch_irn_consider_in_reg_alloc(cls, node))
571                         continue;
572
573                 if (all_preds_known) {
574                         available = available_in_all_preds(pred_worksets, arity, node, true);
575                 } else {
576                         available = AVAILABLE_UNKNOWN;
577                 }
578
579                 loc = to_take_or_not_to_take(first, node, loop, available);
580
581                 if (! USES_IS_INFINITE(loc.time)) {
582                         if (USES_IS_PENDING(loc.time))
583                                 ARR_APP1(loc_t, delayed, loc);
584                         else
585                                 ARR_APP1(loc_t, starters, loc);
586                 } else {
587                         be_spill_phi(senv, node);
588                 }
589         }
590
591         /* check all Live-Ins */
592         be_lv_foreach(lv, block, be_lv_state_in, i) {
593                 ir_node *node = be_lv_get_irn(lv, block, i);
594                 unsigned available;
595
596                 if (all_preds_known) {
597                         available = available_in_all_preds(pred_worksets, arity, node, false);
598                 } else {
599                         available = AVAILABLE_UNKNOWN;
600                 }
601
602                 loc = to_take_or_not_to_take(first, node, loop, available);
603
604                 if (! USES_IS_INFINITE(loc.time)) {
605                         if (USES_IS_PENDING(loc.time))
606                                 ARR_APP1(loc_t, delayed, loc);
607                         else
608                                 ARR_APP1(loc_t, starters, loc);
609                 }
610         }
611
612         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
613         assert(ARR_LEN(delayed) <= (signed)pressure);
614         free_slots          = n_regs - ARR_LEN(starters);
615         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
616         free_slots          = MIN(free_slots, free_pressure_slots);
617
618         /* so far we only put nodes into the starters list that are used inside
619          * the loop. If register pressure in the loop is low then we can take some
620          * values and let them live through the loop */
621         DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
622             pressure, free_slots));
623         if (free_slots > 0) {
624                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
625
626                 for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
627                         int    p, arity;
628                         loc_t *loc = & delayed[i];
629
630                         if (!is_Phi(loc->node)) {
631                                 /* don't use values which are dead in a known predecessors
632                                  * to not induce unnecessary reloads */
633                                 arity = get_irn_arity(block);
634                                 for (p = 0; p < arity; ++p) {
635                                         ir_node      *pred_block = get_Block_cfgpred_block(block, p);
636                                         block_info_t *pred_info  = get_block_info(pred_block);
637
638                                         if (pred_info == NULL)
639                                                 continue;
640
641                                         if (!workset_contains(pred_info->end_workset, loc->node)) {
642                                                 DB((dbg, DBG_START,
643                                                         "    delayed %+F not live at pred %+F\n", loc->node,
644                                                         pred_block));
645                                                 goto skip_delayed;
646                                         }
647                                 }
648                         }
649
650                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
651                         ARR_APP1(loc_t, starters, *loc);
652                         loc->node = NULL;
653                         --free_slots;
654                 skip_delayed:
655                         ;
656                 }
657         }
658
659         /* spill phis (the actual phis not just their values) that are in this block
660          * but not in the start workset */
661         for (i = ARR_LEN(delayed) - 1; i >= 0; --i) {
662                 ir_node *node = delayed[i].node;
663                 if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
664                         continue;
665
666                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
667                 be_spill_phi(senv, node);
668         }
669         DEL_ARR_F(delayed);
670
671         /* Sort start values by first use */
672         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
673
674         /* Copy the best ones from starters to start workset */
675         ws_count = MIN(ARR_LEN(starters), n_regs);
676         workset_clear(ws);
677         workset_bulk_fill(ws, ws_count, starters);
678
679         /* spill phis (the actual phis not just their values) that are in this block
680          * but not in the start workset */
681         len = ARR_LEN(starters);
682         for (i = ws_count; i < len; ++i) {
683                 ir_node *node = starters[i].node;
684                 if (! is_Phi(node) || get_nodes_block(node) != block)
685                         continue;
686
687                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
688                 be_spill_phi(senv, node);
689         }
690
691         DEL_ARR_F(starters);
692
693         /* determine spill status of the values: If there's 1 pred block (which
694          * is no backedge) where the value is spilled then we must set it to
695          * spilled here. */
696         for(i = 0; i < ws_count; ++i) {
697                 loc_t   *loc     = &ws->vals[i];
698                 ir_node *value   = loc->node;
699                 bool     spilled;
700                 int      n;
701
702                 /* phis from this block aren't spilled */
703                 if (get_nodes_block(value) == block) {
704                         assert(is_Phi(value));
705                         loc->spilled = false;
706                         continue;
707                 }
708
709                 /* determine if value was spilled on any predecessor */
710                 spilled = false;
711                 for(n = 0; n < arity; ++n) {
712                         workset_t *pred_workset = pred_worksets[n];
713                         int        p_len;
714                         int        p;
715
716                         if (pred_workset == NULL)
717                                 continue;
718
719                         p_len = workset_get_length(pred_workset);
720                         for(p = 0; p < p_len; ++p) {
721                                 loc_t *l = &pred_workset->vals[p];
722
723                                 if (l->node != value)
724                                         continue;
725
726                                 if (l->spilled) {
727                                         spilled = true;
728                                 }
729                                 break;
730                         }
731                 }
732
733                 loc->spilled = spilled;
734         }
735 }
736
737 /**
738  * For the given block @p block, decide for each values
739  * whether it is used from a register or is reloaded
740  * before the use.
741  */
742 static void process_block(ir_node *block)
743 {
744         workset_t       *new_vals;
745         ir_node         *irn;
746         int              iter;
747         block_info_t    *block_info;
748         int              arity;
749
750         /* no need to process a block twice */
751         assert(get_block_info(block) == NULL);
752
753         /* construct start workset */
754         arity = get_Block_n_cfgpreds(block);
755         if (arity == 0) {
756                 /* no predecessor -> empty set */
757                 workset_clear(ws);
758         } else if (arity == 1) {
759                 /* one predecessor, copy it's end workset */
760                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
761                 block_info_t *pred_info  = get_block_info(pred_block);
762
763                 assert(pred_info != NULL);
764                 workset_copy(ws, pred_info->end_workset);
765         } else {
766                 /* multiple predecessors, do more advanced magic :) */
767                 decide_start_workset(block);
768         }
769
770         DB((dbg, DBG_DECIDE, "\n"));
771         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
772
773         block_info = new_block_info();
774         set_block_info(block, block_info);
775
776         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
777         workset_foreach(ws, irn, iter) {
778                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
779                      workset_get_time(ws, iter)));
780         }
781
782         block_info->start_workset = workset_clone(ws);
783
784         /* process the block from start to end */
785         DB((dbg, DBG_WSETS, "Processing...\n"));
786         instr_nr = 0;
787         /* TODO: this leaks (into the obstack)... */
788         new_vals = new_workset();
789
790         sched_foreach(block, irn) {
791                 int i, arity;
792                 assert(workset_get_length(ws) <= n_regs);
793
794                 /* Phis are no real instr (see insert_starters()) */
795                 if (is_Phi(irn)) {
796                         continue;
797                 }
798                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
799
800                 /* set instruction in the workset */
801                 instr = irn;
802
803                 /* allocate all values _used_ by this instruction */
804                 workset_clear(new_vals);
805                 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
806                         ir_node *in = get_irn_n(irn, i);
807                         if (!arch_irn_consider_in_reg_alloc(cls, in))
808                                 continue;
809
810                         /* (note that "spilled" is irrelevant here) */
811                         workset_insert(new_vals, in, false);
812                 }
813                 displace(new_vals, 1);
814
815                 /* allocate all values _defined_ by this instruction */
816                 workset_clear(new_vals);
817                 if (get_irn_mode(irn) == mode_T) {
818                         const ir_edge_t *edge;
819
820                         foreach_out_edge(irn, edge) {
821                                 ir_node *proj = get_edge_src_irn(edge);
822                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
823                                         continue;
824                                 workset_insert(new_vals, proj, false);
825                         }
826                 } else {
827                         if (!arch_irn_consider_in_reg_alloc(cls, irn))
828                                 continue;
829                         workset_insert(new_vals, irn, false);
830                 }
831                 displace(new_vals, 0);
832
833                 instr_nr++;
834         }
835
836         /* Remember end-workset for this block */
837         block_info->end_workset = workset_clone(ws);
838         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
839         workset_foreach(ws, irn, iter)
840                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
841                      workset_get_time(ws, iter)));
842 }
843
844 /**
845  * 'decide' is block-local and makes assumptions
846  * about the set of live-ins. Thus we must adapt the
847  * live-outs to the live-ins at each block-border.
848  */
849 static void fix_block_borders(ir_node *block, void *data)
850 {
851         workset_t    *start_workset;
852         int           arity;
853         int           i;
854         int           iter;
855         (void) data;
856
857         DB((dbg, DBG_FIX, "\n"));
858         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
859
860         arity = get_irn_arity(block);
861         /* can happen for endless loops */
862         if (arity == 0)
863                 return;
864
865         start_workset = get_block_info(block)->start_workset;
866
867         /* process all pred blocks */
868         for (i = 0; i < arity; ++i) {
869                 ir_node   *pred = get_Block_cfgpred_block(block, i);
870                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
871                 ir_node   *node;
872
873                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
874
875                 /* spill all values not used anymore */
876                 workset_foreach(pred_end_workset, node, iter) {
877                         ir_node *n2;
878                         int      iter2;
879                         bool     found = false;
880                         workset_foreach(start_workset, n2, iter2) {
881                                 if (n2 == node) {
882                                         found = true;
883                                         break;
884                                 }
885                                 /* note that we do not look at phi inputs, becuase the values
886                                  * will be either live-end and need no spill or
887                                  * they have other users in which must be somewhere else in the
888                                  * workset */
889                         }
890
891                         if (found)
892                                 continue;
893
894                         if (move_spills && be_is_live_in(lv, block, node)
895                                         && !pred_end_workset->vals[iter].spilled) {
896                                 ir_node *insert_point;
897                                 if (arity > 1) {
898                                         insert_point = be_get_end_of_block_insertion_point(pred);
899                                         insert_point = sched_prev(insert_point);
900                                 } else {
901                                         insert_point = block;
902                                 }
903                                 DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
904                                      insert_point));
905                                 be_add_spill(senv, node, insert_point);
906                         }
907                 }
908
909                 /* reload missing values in predecessors, add missing spills */
910                 workset_foreach(start_workset, node, iter) {
911                         const loc_t *l    = &start_workset->vals[iter];
912                         const loc_t *pred_loc;
913
914                         /* if node is a phi of the current block we reload
915                          * the corresponding argument, else node itself */
916                         if (is_Phi(node) && get_nodes_block(node) == block) {
917                                 node = get_irn_n(node, i);
918                                 assert(!l->spilled);
919
920                                 /* we might have unknowns as argument for the phi */
921                                 if (!arch_irn_consider_in_reg_alloc(cls, node))
922                                         continue;
923                         }
924
925                         /* check if node is in a register at end of pred */
926                         pred_loc = workset_contains(pred_end_workset, node);
927                         if (pred_loc != NULL) {
928                                 /* we might have to spill value on this path */
929                                 if (move_spills && !pred_loc->spilled && l->spilled) {
930                                         ir_node *insert_point
931                                                 = be_get_end_of_block_insertion_point(pred);
932                                         insert_point = sched_prev(insert_point);
933                                         DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
934                                             insert_point));
935                                         be_add_spill(senv, node, insert_point);
936                                 }
937                         } else {
938                                 /* node is not in register at the end of pred -> reload it */
939                                 DB((dbg, DBG_FIX, "    reload %+F\n", node));
940                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
941                                 be_add_reload_on_edge(senv, node, block, i, cls, 1);
942                         }
943                 }
944         }
945 }
946
947 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
948 {
949         int i;
950         ir_graph *irg = be_get_birg_irg(birg);
951
952         be_liveness_assure_sets(be_assure_liveness(birg));
953
954         stat_ev_tim_push();
955         /* construct control flow loop tree */
956         if (! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
957                 construct_cf_backedges(irg);
958         }
959         stat_ev_tim_pop("belady_time_backedges");
960
961         stat_ev_tim_push();
962         be_clear_links(irg);
963         stat_ev_tim_pop("belady_time_clear_links");
964
965         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
966
967         /* init belady env */
968         stat_ev_tim_push();
969         obstack_init(&obst);
970         cls       = rcls;
971         lv        = be_get_birg_liveness(birg);
972         n_regs    = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
973         ws        = new_workset();
974         uses      = be_begin_uses(irg, lv);
975         loop_ana  = be_new_loop_pressure(birg, cls);
976         senv      = be_new_spill_env(birg);
977         blocklist = be_get_cfgpostorder(irg);
978         stat_ev_tim_pop("belady_time_init");
979
980         stat_ev_tim_push();
981         /* walk blocks in reverse postorder */
982         for (i = ARR_LEN(blocklist) - 1; i >= 0; --i) {
983                 process_block(blocklist[i]);
984         }
985         DEL_ARR_F(blocklist);
986         stat_ev_tim_pop("belady_time_belady");
987
988         stat_ev_tim_push();
989         /* belady was block-local, fix the global flow by adding reloads on the
990          * edges */
991         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
992         stat_ev_tim_pop("belady_time_fix_borders");
993
994         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
995
996         /* Insert spill/reload nodes into the graph and fix usages */
997         be_insert_spills_reloads(senv);
998
999         /* clean up */
1000         be_delete_spill_env(senv);
1001         be_end_uses(uses);
1002         be_free_loop_pressure(loop_ana);
1003         obstack_free(&obst, NULL);
1004 }
1005
1006 void be_init_spillbelady(void)
1007 {
1008         static be_spiller_t belady_spiller = {
1009                 be_spill_belady
1010         };
1011         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
1012         lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
1013         lc_opt_add_table(belady_group, options);
1014
1015         be_register_spiller("belady", &belady_spiller);
1016         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
1017 }
1018
1019 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);