- split get_pairidx_for_regidx(), always called with constant parameter
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  * @version     $Id$
26  */
27 #include "config.h"
28
29 #include <stdbool.h>
30
31 #include "obst.h"
32 #include "irprintf_t.h"
33 #include "irgraph.h"
34 #include "irnode.h"
35 #include "irmode.h"
36 #include "irgwalk.h"
37 #include "irloop.h"
38 #include "iredges_t.h"
39 #include "ircons_t.h"
40 #include "irprintf.h"
41 #include "irnodeset.h"
42
43 #include "beutil.h"
44 #include "bearch_t.h"
45 #include "beuses.h"
46 #include "besched_t.h"
47 #include "beirgmod.h"
48 #include "belive_t.h"
49 #include "benode_t.h"
50 #include "bechordal_t.h"
51 #include "bespilloptions.h"
52 #include "beloopana.h"
53 #include "beirg_t.h"
54 #include "bespill.h"
55 #include "bemodule.h"
56
57 #define DBG_SPILL     1
58 #define DBG_WSETS     2
59 #define DBG_FIX       4
60 #define DBG_DECIDE    8
61 #define DBG_START    16
62 #define DBG_SLOTS    32
63 #define DBG_TRACE    64
64 #define DBG_WORKSET 128
65 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
66
67 #define TIME_UNDEFINED 6666
68
69 //#define LOOK_AT_LOOPDEPTH
70
71 /**
72  * An association between a node and a point in time.
73  */
74 typedef struct loc_t {
75         ir_node          *node;
76         unsigned          time;     /**< A use time (see beuses.h). */
77         bool              spilled;  /**< the value was already spilled on this path */
78 } loc_t;
79
80 typedef struct _workset_t {
81         int   len;          /**< current length */
82         loc_t vals[0];      /**< inlined array of the values/distances in this working set */
83 } workset_t;
84
85 static struct obstack               obst;
86 static const arch_register_class_t *cls;
87 static const be_lv_t               *lv;
88 static be_loopana_t                *loop_ana;
89 static int                          n_regs;
90 static workset_t                   *ws;     /**< the main workset used while
91                                                      processing a block. */
92 static be_uses_t                   *uses;   /**< env for the next-use magic */
93 static ir_node                     *instr;  /**< current instruction */
94 static unsigned                     instr_nr; /**< current instruction number
95                                                        (relative to block start) */
96 static spill_env_t                 *senv;   /**< see bespill.h */
97 static ir_node                    **blocklist;
98
99 static bool                         move_spills      = true;
100 static bool                         respectloopdepth = true;
101 static bool                         improve_known_preds = true;
102 /* factor to weight the different costs of reloading/rematerializing a node
103    (see bespill.h be_get_reload_costs_no_weight) */
104 static int                          remat_bonus      = 10;
105
106 static const lc_opt_table_entry_t options[] = {
107         LC_OPT_ENT_BOOL   ("movespills", "try to move spills out of loops", &move_spills),
108         LC_OPT_ENT_BOOL   ("respectloopdepth", "exprimental (outermost loop cutting)", &respectloopdepth),
109         LC_OPT_ENT_BOOL   ("improveknownpreds", "experimental (known preds cutting)", &improve_known_preds),
110         LC_OPT_ENT_INT    ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
111         LC_OPT_LAST
112 };
113
114 static int loc_compare(const void *a, const void *b)
115 {
116         const loc_t *p = a;
117         const loc_t *q = b;
118         return p->time - q->time;
119 }
120
121 void workset_print(const workset_t *w)
122 {
123         int i;
124
125         for(i = 0; i < w->len; ++i) {
126                 ir_fprintf(stderr, "%+F %d\n", w->vals[i].node, w->vals[i].time);
127         }
128 }
129
130 /**
131  * Alloc a new workset on obstack @p ob with maximum size @p max
132  */
133 static workset_t *new_workset(void)
134 {
135         workset_t *res;
136         size_t     size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
137
138         res  = obstack_alloc(&obst, size);
139         memset(res, 0, size);
140         return res;
141 }
142
143 /**
144  * Alloc a new instance on obstack and make it equal to @param workset
145  */
146 static workset_t *workset_clone(workset_t *workset)
147 {
148         workset_t *res;
149         size_t size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
150         res = obstack_alloc(&obst, size);
151         memcpy(res, workset, size);
152         return res;
153 }
154
155 /**
156  * Copy workset @param src to @param tgt
157  */
158 static void workset_copy(workset_t *dest, const workset_t *src)
159 {
160         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
161         memcpy(dest, src, size);
162 }
163
164 /**
165  * Overwrites the current content array of @param ws with the
166  * @param count locations given at memory @param locs.
167  * Set the length of @param ws to count.
168  */
169 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
170 {
171         workset->len = count;
172         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
173 }
174
175 /**
176  * Inserts the value @p val into the workset, iff it is not
177  * already contained. The workset must not be full.
178  */
179 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
180 {
181         loc_t *loc;
182         int    i;
183         /* check for current regclass */
184         assert(arch_irn_consider_in_reg_alloc(cls, val));
185
186         /* check if val is already contained */
187         for (i = 0; i < workset->len; ++i) {
188                 loc = &workset->vals[i];
189                 if (loc->node == val) {
190                         if (spilled) {
191                                 loc->spilled = true;
192                         }
193                         return;
194                 }
195         }
196
197         /* insert val */
198         assert(workset->len < n_regs && "Workset already full!");
199         loc           = &workset->vals[workset->len];
200         loc->node     = val;
201         loc->spilled  = spilled;
202         loc->time     = TIME_UNDEFINED;
203         workset->len++;
204 }
205
206 /**
207  * Removes all entries from this workset
208  */
209 static void workset_clear(workset_t *workset)
210 {
211         workset->len = 0;
212 }
213
214 /**
215  * Removes the value @p val from the workset if present.
216  */
217 static INLINE void workset_remove(workset_t *workset, ir_node *val)
218 {
219         int i;
220         for(i = 0; i < workset->len; ++i) {
221                 if (workset->vals[i].node == val) {
222                         workset->vals[i] = workset->vals[--workset->len];
223                         return;
224                 }
225         }
226 }
227
228 static INLINE const loc_t *workset_contains(const workset_t *ws,
229                                             const ir_node *val)
230 {
231         int i;
232
233         for (i = 0; i < ws->len; ++i) {
234                 if (ws->vals[i].node == val)
235                         return &ws->vals[i];
236         }
237
238         return NULL;
239 }
240
241 /**
242  * Iterates over all values in the working set.
243  * @p ws The workset to iterate
244  * @p v  A variable to put the current value in
245  * @p i  An integer for internal use
246  */
247 #define workset_foreach(ws, v, i)       for(i=0; \
248                                                                                 v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; \
249                                                                                 ++i)
250
251 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
252 #define workset_get_time(ws, i) (ws)->vals[i].time
253 #define workset_set_length(ws, length) (ws)->len = length
254 #define workset_get_length(ws) ((ws)->len)
255 #define workset_get_val(ws, i) ((ws)->vals[i].node)
256 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
257
258 typedef struct _block_info_t
259 {
260         workset_t *start_workset;
261         workset_t *end_workset;
262 } block_info_t;
263
264
265 static void *new_block_info(void)
266 {
267         block_info_t *res = obstack_alloc(&obst, sizeof(res[0]));
268         memset(res, 0, sizeof(res[0]));
269
270         return res;
271 }
272
273 #define get_block_info(block)        ((block_info_t *)get_irn_link(block))
274 #define set_block_info(block, info)  set_irn_link(block, info)
275
276 /**
277  * @return The distance to the next use or 0 if irn has dont_spill flag set
278  */
279 static INLINE unsigned get_distance(ir_node *from, unsigned from_step,
280                                     const ir_node *def, int skip_from_uses)
281 {
282         be_next_use_t use;
283         int           flags = arch_irn_get_flags(def);
284         unsigned      costs;
285         unsigned      time;
286
287         assert(! (flags & arch_irn_flags_ignore));
288
289         use  = be_get_next_use(uses, from, from_step, def, skip_from_uses);
290         time = use.time;
291         if (USES_IS_INFINITE(time))
292                 return USES_INFINITY;
293
294         /* We have to keep nonspillable nodes in the workingset */
295         if (flags & arch_irn_flags_dont_spill)
296                 return 0;
297
298         /* give some bonus to rematerialisable nodes */
299         if (remat_bonus > 0) {
300                 costs = be_get_reload_costs_no_weight(senv, def, use.before);
301                 assert(costs * remat_bonus < 1000);
302                 time  += 1000 - (costs * remat_bonus);
303         }
304
305         return time;
306 }
307
308 /**
309  * Performs the actions necessary to grant the request that:
310  * - new_vals can be held in registers
311  * - as few as possible other values are disposed
312  * - the worst values get disposed
313  *
314  * @p is_usage indicates that the values in new_vals are used (not defined)
315  * In this case reloads must be performed
316  */
317 static void displace(workset_t *new_vals, int is_usage)
318 {
319         ir_node **to_insert = alloca(n_regs * sizeof(to_insert[0]));
320         bool     *spilled   = alloca(n_regs * sizeof(spilled[0]));
321         ir_node  *val;
322         int       i;
323         int       len;
324         int       spills_needed;
325         int       demand;
326         int       iter;
327
328         /* 1. Identify the number of needed slots and the values to reload */
329         demand = 0;
330         workset_foreach(new_vals, val, iter) {
331                 bool reloaded = false;
332
333                 if (! workset_contains(ws, val)) {
334                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
335                         if (is_usage) {
336                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
337                                 be_add_reload(senv, val, instr, cls, 1);
338                                 reloaded = true;
339                         }
340                 } else {
341                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
342                         assert(is_usage);
343                         /* remove the value from the current workset so it is not accidently
344                          * spilled */
345                         workset_remove(ws, val);
346                 }
347                 spilled[demand]   = reloaded;
348                 to_insert[demand] = val;
349                 ++demand;
350         }
351
352         /* 2. Make room for at least 'demand' slots */
353         len           = workset_get_length(ws);
354         spills_needed = len + demand - n_regs;
355         assert(spills_needed <= len);
356
357         /* Only make more free room if we do not have enough */
358         if (spills_needed > 0) {
359                 ir_node   *curr_bb  = NULL;
360                 workset_t *ws_start = NULL;
361
362                 if (move_spills) {
363                         curr_bb  = get_nodes_block(instr);
364                         ws_start = get_block_info(curr_bb)->start_workset;
365                 }
366
367                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
368
369                 /* calculate current next-use distance for live values */
370                 for (i = 0; i < len; ++i) {
371                         ir_node  *val  = workset_get_val(ws, i);
372                         unsigned  dist = get_distance(instr, instr_nr, val, !is_usage);
373                         workset_set_time(ws, i, dist);
374                 }
375
376                 /* sort entries by increasing nextuse-distance*/
377                 workset_sort(ws);
378
379                 for (i = len - spills_needed; i < len; ++i) {
380                         ir_node *val = ws->vals[i].node;
381
382                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
383                              workset_get_time(ws, i)));
384
385                         if (move_spills) {
386                                 if (!USES_IS_INFINITE(ws->vals[i].time)
387                                                 && !ws->vals[i].spilled) {
388                                         ir_node *after_pos = sched_prev(instr);
389                                         DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
390                                                 after_pos));
391                                         be_add_spill(senv, val, after_pos);
392                                 }
393                         }
394                 }
395
396                 /* kill the last 'demand' entries in the array */
397                 workset_set_length(ws, len - spills_needed);
398         }
399
400         /* 3. Insert the new values into the workset */
401         for (i = 0; i < demand; ++i) {
402                 ir_node *val = to_insert[i];
403
404                 workset_insert(ws, val, spilled[i]);
405         }
406 }
407
408 enum {
409         AVAILABLE_EVERYWHERE,
410         AVAILABLE_NOWHERE,
411         AVAILABLE_PARTLY,
412         AVAILABLE_UNKNOWN
413 };
414
415 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
416                                        size_t n_pred_worksets,
417                                        const ir_node *value, bool is_local_phi)
418 {
419         size_t i;
420         bool   avail_everywhere = true;
421         bool   avail_nowhere    = true;
422
423         assert(n_pred_worksets > 0);
424
425         /* value available in all preds? */
426         for (i = 0; i < n_pred_worksets; ++i) {
427                 bool             found     = false;
428                 const workset_t *p_workset = pred_worksets[i];
429                 int              p_len     = workset_get_length(p_workset);
430                 int              p_i;
431                 const ir_node   *l_value;
432
433                 if (is_local_phi) {
434                         assert(is_Phi(value));
435                         l_value = get_irn_n(value, i);
436                 } else {
437                         l_value = value;
438                 }
439
440                 for (p_i = 0; p_i < p_len; ++p_i) {
441                         const loc_t *p_l = &p_workset->vals[p_i];
442                         if (p_l->node != l_value)
443                                 continue;
444
445                         found = true;
446                         break;
447                 }
448
449                 if (found) {
450                         avail_nowhere = false;
451                 } else {
452                         avail_everywhere = false;
453                 }
454         }
455
456         if (avail_everywhere) {
457                 assert(!avail_nowhere);
458                 return AVAILABLE_EVERYWHERE;
459         } else if (avail_nowhere) {
460                 return AVAILABLE_NOWHERE;
461         } else {
462                 return AVAILABLE_PARTLY;
463         }
464 }
465
466 /** Decides whether a specific node should be in the start workset or not
467  *
468  * @param env      belady environment
469  * @param first
470  * @param node     the node to test
471  * @param loop     the loop of the node
472  */
473 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
474                                     ir_loop *loop, unsigned available)
475 {
476         be_next_use_t next_use;
477         loc_t         loc;
478
479         loc.time    = USES_INFINITY;
480         loc.node    = node;
481         loc.spilled = false;
482
483         if (!arch_irn_consider_in_reg_alloc(cls, node)) {
484                 loc.time = USES_INFINITY;
485                 return loc;
486         }
487
488         /* We have to keep nonspillable nodes in the workingset */
489         if (arch_irn_get_flags(node) & arch_irn_flags_dont_spill) {
490                 loc.time = 0;
491                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
492                 return loc;
493         }
494
495         next_use = be_get_next_use(uses, first, 0, node, 0);
496         if (USES_IS_INFINITE(next_use.time)) {
497                 // the nodes marked as live in shouldn't be dead, so it must be a phi
498                 assert(is_Phi(node));
499                 loc.time = USES_INFINITY;
500                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
501                 return loc;
502         }
503
504         loc.time = next_use.time;
505
506         if (improve_known_preds) {
507                 if (available == AVAILABLE_EVERYWHERE) {
508                         DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n",
509                             node, loc.time));
510                         return loc;
511                 } else if(available == AVAILABLE_NOWHERE) {
512                         DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n",
513                             node, loc.time));
514                         loc.time = USES_INFINITY;
515                         return loc;
516                 }
517         }
518
519         if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
520                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
521                     next_use.outermost_loop));
522         } else {
523                 loc.time = USES_PENDING;
524                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
525                     node, next_use.outermost_loop, get_loop_depth(loop)));
526         }
527
528         return loc;
529 }
530
531 /**
532  * Computes the start-workset for a block with multiple predecessors. We assume
533  * that at least 1 of the predeccesors is a back-edge which means we're at the
534  * beginning of a loop. We try to reload as much values as possible now so they
535  * don't get reloaded inside the loop.
536  */
537 static void decide_start_workset(const ir_node *block)
538 {
539         ir_loop    *loop = get_irn_loop(block);
540         ir_node    *first;
541         ir_node    *node;
542         loc_t       loc;
543         loc_t      *starters;
544         loc_t      *delayed;
545         int         i, len, ws_count;
546         int             free_slots, free_pressure_slots;
547         unsigned    pressure;
548         int         arity;
549         workset_t **pred_worksets;
550         bool        all_preds_known;
551
552         /* check predecessors */
553         arity           = get_irn_arity(block);
554         pred_worksets   = alloca(sizeof(pred_worksets[0]) * arity);
555         all_preds_known = true;
556         for(i = 0; i < arity; ++i) {
557                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
558                 block_info_t *pred_info  = get_block_info(pred_block);
559
560                 if (pred_info == NULL) {
561                         pred_worksets[i] = NULL;
562                         all_preds_known  = false;
563                 } else {
564                         pred_worksets[i] = pred_info->end_workset;
565                 }
566         }
567
568         /* Collect all values living at start of block */
569         starters = NEW_ARR_F(loc_t, 0);
570         delayed  = NEW_ARR_F(loc_t, 0);
571
572         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
573         first = sched_first(block);
574
575         /* check all Phis first */
576         sched_foreach(block, node) {
577                 unsigned available;
578
579                 if (! is_Phi(node))
580                         break;
581                 if (!arch_irn_consider_in_reg_alloc(cls, node))
582                         continue;
583
584                 if (all_preds_known) {
585                         available = available_in_all_preds(pred_worksets, arity, node, true);
586                 } else {
587                         available = AVAILABLE_UNKNOWN;
588                 }
589
590                 loc = to_take_or_not_to_take(first, node, loop, available);
591
592                 if (! USES_IS_INFINITE(loc.time)) {
593                         if (USES_IS_PENDING(loc.time))
594                                 ARR_APP1(loc_t, delayed, loc);
595                         else
596                                 ARR_APP1(loc_t, starters, loc);
597                 } else {
598                         be_spill_phi(senv, node);
599                 }
600         }
601
602         /* check all Live-Ins */
603         be_lv_foreach(lv, block, be_lv_state_in, i) {
604                 ir_node *node = be_lv_get_irn(lv, block, i);
605                 unsigned available;
606
607                 if (all_preds_known) {
608                         available = available_in_all_preds(pred_worksets, arity, node, false);
609                 } else {
610                         available = AVAILABLE_UNKNOWN;
611                 }
612
613                 loc = to_take_or_not_to_take(first, node, loop, available);
614
615                 if (! USES_IS_INFINITE(loc.time)) {
616                         if (USES_IS_PENDING(loc.time))
617                                 ARR_APP1(loc_t, delayed, loc);
618                         else
619                                 ARR_APP1(loc_t, starters, loc);
620                 }
621         }
622
623         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
624         assert(ARR_LEN(delayed) <= (signed)pressure);
625         free_slots          = n_regs - ARR_LEN(starters);
626         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
627         free_slots          = MIN(free_slots, free_pressure_slots);
628
629         /* so far we only put nodes into the starters list that are used inside
630          * the loop. If register pressure in the loop is low then we can take some
631          * values and let them live through the loop */
632         DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
633             pressure, free_slots));
634         if (free_slots > 0) {
635                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
636
637                 for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
638                         int    p, arity;
639                         loc_t *loc = & delayed[i];
640
641                         if (!is_Phi(loc->node)) {
642                                 /* don't use values which are dead in a known predecessors
643                                  * to not induce unnecessary reloads */
644                                 arity = get_irn_arity(block);
645                                 for (p = 0; p < arity; ++p) {
646                                         ir_node      *pred_block = get_Block_cfgpred_block(block, p);
647                                         block_info_t *pred_info  = get_block_info(pred_block);
648
649                                         if (pred_info == NULL)
650                                                 continue;
651
652                                         if (!workset_contains(pred_info->end_workset, loc->node)) {
653                                                 DB((dbg, DBG_START,
654                                                         "    delayed %+F not live at pred %+F\n", loc->node,
655                                                         pred_block));
656                                                 goto skip_delayed;
657                                         }
658                                 }
659                         }
660
661                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
662                         ARR_APP1(loc_t, starters, *loc);
663                         loc->node = NULL;
664                         --free_slots;
665                 skip_delayed:
666                         ;
667                 }
668         }
669
670         /* spill phis (the actual phis not just their values) that are in this block
671          * but not in the start workset */
672         for (i = ARR_LEN(delayed) - 1; i >= 0; --i) {
673                 ir_node *node = delayed[i].node;
674                 if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
675                         continue;
676
677                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
678                 be_spill_phi(senv, node);
679         }
680         DEL_ARR_F(delayed);
681
682         /* Sort start values by first use */
683         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
684
685         /* Copy the best ones from starters to start workset */
686         ws_count = MIN(ARR_LEN(starters), n_regs);
687         workset_clear(ws);
688         workset_bulk_fill(ws, ws_count, starters);
689
690         /* spill phis (the actual phis not just their values) that are in this block
691          * but not in the start workset */
692         len = ARR_LEN(starters);
693         for (i = ws_count; i < len; ++i) {
694                 ir_node *node = starters[i].node;
695                 if (! is_Phi(node) || get_nodes_block(node) != block)
696                         continue;
697
698                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
699                 be_spill_phi(senv, node);
700         }
701
702         DEL_ARR_F(starters);
703
704         /* determine spill status of the values: If there's 1 pred block (which
705          * is no backedge) where the value is spilled then we must set it to
706          * spilled here. */
707         for(i = 0; i < ws_count; ++i) {
708                 loc_t   *loc     = &ws->vals[i];
709                 ir_node *value   = loc->node;
710                 bool     spilled;
711                 int      n;
712
713                 /* phis from this block aren't spilled */
714                 if (get_nodes_block(value) == block) {
715                         assert(is_Phi(value));
716                         loc->spilled = false;
717                         continue;
718                 }
719
720                 /* determine if value was spilled on any predecessor */
721                 spilled = false;
722                 for(n = 0; n < arity; ++n) {
723                         workset_t *pred_workset = pred_worksets[n];
724                         int        p_len;
725                         int        p;
726
727                         if (pred_workset == NULL)
728                                 continue;
729
730                         p_len = workset_get_length(pred_workset);
731                         for(p = 0; p < p_len; ++p) {
732                                 loc_t *l = &pred_workset->vals[p];
733
734                                 if (l->node != value)
735                                         continue;
736
737                                 if (l->spilled) {
738                                         spilled = true;
739                                 }
740                                 break;
741                         }
742                 }
743
744                 loc->spilled = spilled;
745         }
746 }
747
748 /**
749  * For the given block @p block, decide for each values
750  * whether it is used from a register or is reloaded
751  * before the use.
752  */
753 static void process_block(ir_node *block)
754 {
755         workset_t       *new_vals;
756         ir_node         *irn;
757         int              iter;
758         block_info_t    *block_info;
759         int              arity;
760
761         /* no need to process a block twice */
762         assert(get_block_info(block) == NULL);
763
764         /* construct start workset */
765         arity = get_Block_n_cfgpreds(block);
766         if (arity == 0) {
767                 /* no predecessor -> empty set */
768                 workset_clear(ws);
769         } else if (arity == 1) {
770                 /* one predecessor, copy it's end workset */
771                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
772                 block_info_t *pred_info  = get_block_info(pred_block);
773
774                 assert(pred_info != NULL);
775                 workset_copy(ws, pred_info->end_workset);
776         } else {
777                 /* multiple predecessors, do more advanced magic :) */
778                 decide_start_workset(block);
779         }
780
781         DB((dbg, DBG_DECIDE, "\n"));
782         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
783
784         block_info = new_block_info();
785         set_block_info(block, block_info);
786
787         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
788         workset_foreach(ws, irn, iter) {
789                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
790                      workset_get_time(ws, iter)));
791         }
792
793         block_info->start_workset = workset_clone(ws);
794
795         /* process the block from start to end */
796         DB((dbg, DBG_WSETS, "Processing...\n"));
797         instr_nr = 0;
798         /* TODO: this leaks (into the obstack)... */
799         new_vals = new_workset();
800
801         sched_foreach(block, irn) {
802                 int i, arity;
803                 assert(workset_get_length(ws) <= n_regs);
804
805                 /* Phis are no real instr (see insert_starters()) */
806                 if (is_Phi(irn)) {
807                         continue;
808                 }
809                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
810
811                 /* set instruction in the workset */
812                 instr = irn;
813
814                 /* allocate all values _used_ by this instruction */
815                 workset_clear(new_vals);
816                 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
817                         ir_node *in = get_irn_n(irn, i);
818                         if (!arch_irn_consider_in_reg_alloc(cls, in))
819                                 continue;
820
821                         /* (note that "spilled" is irrelevant here) */
822                         workset_insert(new_vals, in, false);
823                 }
824                 displace(new_vals, 1);
825
826                 /* allocate all values _defined_ by this instruction */
827                 workset_clear(new_vals);
828                 if (get_irn_mode(irn) == mode_T) {
829                         const ir_edge_t *edge;
830
831                         foreach_out_edge(irn, edge) {
832                                 ir_node *proj = get_edge_src_irn(edge);
833                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
834                                         continue;
835                                 workset_insert(new_vals, proj, false);
836                         }
837                 } else {
838                         if (!arch_irn_consider_in_reg_alloc(cls, irn))
839                                 continue;
840                         workset_insert(new_vals, irn, false);
841                 }
842                 displace(new_vals, 0);
843
844                 instr_nr++;
845         }
846
847         /* Remember end-workset for this block */
848         block_info->end_workset = workset_clone(ws);
849         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
850         workset_foreach(ws, irn, iter)
851                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
852                      workset_get_time(ws, iter)));
853 }
854
855 /**
856  * 'decide' is block-local and makes assumptions
857  * about the set of live-ins. Thus we must adapt the
858  * live-outs to the live-ins at each block-border.
859  */
860 static void fix_block_borders(ir_node *block, void *data)
861 {
862         workset_t    *start_workset;
863         int           arity;
864         int           i;
865         int           iter;
866         (void) data;
867
868         DB((dbg, DBG_FIX, "\n"));
869         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
870
871         arity = get_irn_arity(block);
872         /* can happen for endless loops */
873         if (arity == 0)
874                 return;
875
876         start_workset = get_block_info(block)->start_workset;
877
878         /* process all pred blocks */
879         for (i = 0; i < arity; ++i) {
880                 ir_node   *pred = get_Block_cfgpred_block(block, i);
881                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
882                 ir_node   *node;
883
884                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
885
886                 /* spill all values not used anymore */
887                 workset_foreach(pred_end_workset, node, iter) {
888                         ir_node *n2;
889                         int      iter2;
890                         bool     found = false;
891                         workset_foreach(start_workset, n2, iter2) {
892                                 if (n2 == node) {
893                                         found = true;
894                                         break;
895                                 }
896                                 /* note that we do not look at phi inputs, becuase the values
897                                  * will be either live-end and need no spill or
898                                  * they have other users in which must be somewhere else in the
899                                  * workset */
900                         }
901
902                         if (found)
903                                 continue;
904
905                         if (move_spills && be_is_live_in(lv, block, node)
906                                         && !pred_end_workset->vals[iter].spilled) {
907                                 ir_node *insert_point;
908                                 if (arity > 1) {
909                                         insert_point = be_get_end_of_block_insertion_point(pred);
910                                         insert_point = sched_prev(insert_point);
911                                 } else {
912                                         insert_point = block;
913                                 }
914                                 DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
915                                      insert_point));
916                                 be_add_spill(senv, node, insert_point);
917                         }
918                 }
919
920                 /* reload missing values in predecessors, add missing spills */
921                 workset_foreach(start_workset, node, iter) {
922                         const loc_t *l    = &start_workset->vals[iter];
923                         const loc_t *pred_loc;
924
925                         /* if node is a phi of the current block we reload
926                          * the corresponding argument, else node itself */
927                         if (is_Phi(node) && get_nodes_block(node) == block) {
928                                 node = get_irn_n(node, i);
929                                 assert(!l->spilled);
930
931                                 /* we might have unknowns as argument for the phi */
932                                 if (!arch_irn_consider_in_reg_alloc(cls, node))
933                                         continue;
934                         }
935
936                         /* check if node is in a register at end of pred */
937                         pred_loc = workset_contains(pred_end_workset, node);
938                         if (pred_loc != NULL) {
939                                 /* we might have to spill value on this path */
940                                 if (move_spills && !pred_loc->spilled && l->spilled) {
941                                         ir_node *insert_point
942                                                 = be_get_end_of_block_insertion_point(pred);
943                                         insert_point = sched_prev(insert_point);
944                                         DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
945                                             insert_point));
946                                         be_add_spill(senv, node, insert_point);
947                                 }
948                         } else {
949                                 /* node is not in register at the end of pred -> reload it */
950                                 DB((dbg, DBG_FIX, "    reload %+F\n", node));
951                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
952                                 be_add_reload_on_edge(senv, node, block, i, cls, 1);
953                         }
954                 }
955         }
956 }
957
958 static void add_block(ir_node *block, void *data)
959 {
960         (void) data;
961         ARR_APP1(ir_node*, blocklist, block);
962 }
963
964 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
965 {
966         int i;
967         ir_graph *irg = be_get_birg_irg(birg);
968
969         be_liveness_assure_sets(be_assure_liveness(birg));
970
971         stat_ev_tim_push();
972         /* construct control flow loop tree */
973         if (! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
974                 construct_cf_backedges(irg);
975         }
976         stat_ev_tim_pop("belady_time_backedges");
977
978         stat_ev_tim_push();
979         be_clear_links(irg);
980         stat_ev_tim_pop("belady_time_clear_links");
981
982         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
983
984         /* init belady env */
985         stat_ev_tim_push();
986         obstack_init(&obst);
987         cls       = rcls;
988         lv        = be_get_birg_liveness(birg);
989         n_regs    = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
990         ws        = new_workset();
991         uses      = be_begin_uses(irg, lv);
992         loop_ana  = be_new_loop_pressure(birg, cls);
993         senv      = be_new_spill_env(birg);
994         blocklist = NEW_ARR_F(ir_node*, 0);
995         irg_block_edges_walk(get_irg_start_block(irg), NULL, add_block, NULL);
996         stat_ev_tim_pop("belady_time_init");
997
998         stat_ev_tim_push();
999         /* walk blocks in reverse postorder */
1000         for (i = ARR_LEN(blocklist) - 1; i >= 0; --i) {
1001                 process_block(blocklist[i]);
1002         }
1003         DEL_ARR_F(blocklist);
1004         stat_ev_tim_pop("belady_time_belady");
1005
1006         stat_ev_tim_push();
1007         /* belady was block-local, fix the global flow by adding reloads on the
1008          * edges */
1009         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1010         stat_ev_tim_pop("belady_time_fix_borders");
1011
1012         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1013
1014         /* Insert spill/reload nodes into the graph and fix usages */
1015         be_insert_spills_reloads(senv);
1016
1017         /* clean up */
1018         be_delete_spill_env(senv);
1019         be_end_uses(uses);
1020         be_free_loop_pressure(loop_ana);
1021         obstack_free(&obst, NULL);
1022 }
1023
1024 void be_init_spillbelady(void)
1025 {
1026         static be_spiller_t belady_spiller = {
1027                 be_spill_belady
1028         };
1029         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
1030         lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
1031         lc_opt_add_table(belady_group, options);
1032
1033         be_register_spiller("belady", &belady_spiller);
1034         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
1035 }
1036
1037 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);