be: rework op_attr handling
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  */
26 #include "config.h"
27
28 #include <stdbool.h>
29
30 #include "obst.h"
31 #include "irprintf_t.h"
32 #include "irgraph.h"
33 #include "irnode.h"
34 #include "irmode.h"
35 #include "irgwalk.h"
36 #include "irloop.h"
37 #include "iredges_t.h"
38 #include "ircons_t.h"
39 #include "irprintf.h"
40 #include "irnodeset.h"
41 #include "irtools.h"
42 #include "util.h"
43
44 #include "beutil.h"
45 #include "bearch.h"
46 #include "beuses.h"
47 #include "besched.h"
48 #include "beirgmod.h"
49 #include "belive_t.h"
50 #include "benode.h"
51 #include "bechordal_t.h"
52 #include "bespill.h"
53 #include "beloopana.h"
54 #include "beirg.h"
55 #include "bespillutil.h"
56 #include "bemodule.h"
57
58 #define DBG_SPILL     1
59 #define DBG_WSETS     2
60 #define DBG_FIX       4
61 #define DBG_DECIDE    8
62 #define DBG_START    16
63 #define DBG_SLOTS    32
64 #define DBG_TRACE    64
65 #define DBG_WORKSET 128
66 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
67
68 #define TIME_UNDEFINED 6666
69
70 /**
71  * An association between a node and a point in time.
72  */
73 typedef struct loc_t {
74         ir_node          *node;
75         unsigned          time;     /**< A use time (see beuses.h). */
76         bool              spilled;  /**< value was already spilled on this path */
77 } loc_t;
78
79 typedef struct workset_t {
80         unsigned len;     /**< current length */
81         loc_t    vals[0]; /**< array of the values/distances in this working set */
82 } workset_t;
83
84 static struct obstack               obst;
85 static const arch_register_class_t *cls;
86 static const be_lv_t               *lv;
87 static be_loopana_t                *loop_ana;
88 static unsigned                     n_regs;
89 static workset_t                   *ws;     /**< the main workset used while
90                                                      processing a block. */
91 static be_uses_t                   *uses;   /**< env for the next-use magic */
92 static ir_node                     *instr;  /**< current instruction */
93 static spill_env_t                 *senv;   /**< see bespill.h */
94 static ir_node                    **blocklist;
95
96 static int                          move_spills      = true;
97 static int                          respectloopdepth = true;
98 static int                          improve_known_preds = true;
99 /* factor to weight the different costs of reloading/rematerializing a node
100    (see bespill.h be_get_reload_costs_no_weight) */
101 static int                          remat_bonus      = 10;
102
103 static const lc_opt_table_entry_t options[] = {
104         LC_OPT_ENT_BOOL   ("movespills", "try to move spills out of loops", &move_spills),
105         LC_OPT_ENT_BOOL   ("respectloopdepth", "outermost loop cutting", &respectloopdepth),
106         LC_OPT_ENT_BOOL   ("improveknownpreds", "known preds cutting", &improve_known_preds),
107         LC_OPT_ENT_INT    ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
108         LC_OPT_LAST
109 };
110
111 /**
112  * Alloc a new workset on obstack @p ob with maximum size @p max
113  */
114 static workset_t *new_workset(void)
115 {
116         return OALLOCFZ(&obst, workset_t, vals, n_regs);
117 }
118
119 /**
120  * Alloc a new instance on obstack and make it equal to @param workset
121  */
122 static workset_t *workset_clone(workset_t *workset)
123 {
124         workset_t *res = OALLOCF(&obst, workset_t, vals, n_regs);
125         memcpy(res, workset, sizeof(*res) + n_regs * sizeof(res->vals[0]));
126         return res;
127 }
128
129 /**
130  * Copy workset @param src to @param tgt
131  */
132 static void workset_copy(workset_t *dest, const workset_t *src)
133 {
134         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
135         memcpy(dest, src, size);
136 }
137
138 /**
139  * Overwrites the current content array of @param ws with the
140  * @param count locations given at memory @param locs.
141  * Set the length of @param ws to count.
142  */
143 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
144 {
145         workset->len = count;
146         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
147 }
148
149 /**
150  * Inserts the value @p val into the workset, iff it is not
151  * already contained. The workset must not be full.
152  */
153 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
154 {
155         loc_t    *loc;
156         unsigned  i;
157         /* check for current regclass */
158         assert(arch_irn_consider_in_reg_alloc(cls, val));
159
160         /* check if val is already contained */
161         for (i = 0; i < workset->len; ++i) {
162                 loc = &workset->vals[i];
163                 if (loc->node == val) {
164                         if (spilled) {
165                                 loc->spilled = true;
166                         }
167                         return;
168                 }
169         }
170
171         /* insert val */
172         assert(workset->len < n_regs && "Workset already full!");
173         loc           = &workset->vals[workset->len];
174         loc->node     = val;
175         loc->spilled  = spilled;
176         loc->time     = TIME_UNDEFINED;
177         workset->len++;
178 }
179
180 /**
181  * Removes all entries from this workset
182  */
183 static void workset_clear(workset_t *workset)
184 {
185         workset->len = 0;
186 }
187
188 /**
189  * Removes the value @p val from the workset if present.
190  */
191 static void workset_remove(workset_t *workset, ir_node *val)
192 {
193         unsigned i;
194         for (i = 0; i < workset->len; ++i) {
195                 if (workset->vals[i].node == val) {
196                         workset->vals[i] = workset->vals[--workset->len];
197                         return;
198                 }
199         }
200 }
201
202 static const loc_t *workset_contains(const workset_t *ws, const ir_node *val)
203 {
204         unsigned i;
205         for (i = 0; i < ws->len; ++i) {
206                 if (ws->vals[i].node == val)
207                         return &ws->vals[i];
208         }
209
210         return NULL;
211 }
212
213 static int loc_compare(const void *a, const void *b)
214 {
215         const loc_t *p = (const loc_t*)a;
216         const loc_t *q = (const loc_t*)b;
217         return p->time - q->time;
218 }
219
220 static void workset_sort(workset_t *workset)
221 {
222         qsort(workset->vals, workset->len, sizeof(workset->vals[0]), loc_compare);
223 }
224
225 static inline unsigned workset_get_time(const workset_t *workset, unsigned idx)
226 {
227         return workset->vals[idx].time;
228 }
229
230 static inline void workset_set_time(workset_t *workset, unsigned idx,
231                                     unsigned time)
232 {
233         workset->vals[idx].time = time;
234 }
235
236 static inline unsigned workset_get_length(const workset_t *workset)
237 {
238         return workset->len;
239 }
240
241 static inline void workset_set_length(workset_t *workset, unsigned len)
242 {
243         workset->len = len;
244 }
245
246 static inline ir_node *workset_get_val(const workset_t *workset, unsigned idx)
247 {
248         return workset->vals[idx].node;
249 }
250
251 /**
252  * Iterates over all values in the working set.
253  * @p ws The workset to iterate
254  * @p v  A variable to put the current value in
255  * @p i  An integer for internal use
256  */
257 #define workset_foreach(ws, v, i) \
258         for (i=0; v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; ++i)
259
260 typedef struct block_info_t {
261         workset_t *start_workset;
262         workset_t *end_workset;
263 } block_info_t;
264
265 static block_info_t *new_block_info(void)
266 {
267         return OALLOCZ(&obst, block_info_t);
268 }
269
270 static inline block_info_t *get_block_info(const ir_node *block)
271 {
272         return (block_info_t*)get_irn_link(block);
273 }
274
275 static inline void set_block_info(ir_node *block, block_info_t *info)
276 {
277         set_irn_link(block, info);
278 }
279
280 /**
281  * @return The distance to the next use or 0 if irn has dont_spill flag set
282  */
283 static unsigned get_distance(ir_node *from, const ir_node *def, int skip_from_uses)
284 {
285         be_next_use_t use;
286         unsigned      costs;
287         unsigned      time;
288
289         assert(!arch_irn_is_ignore(def));
290
291         use  = be_get_next_use(uses, from, def, skip_from_uses);
292         time = use.time;
293         if (USES_IS_INFINITE(time))
294                 return USES_INFINITY;
295
296         /* We have to keep nonspillable nodes in the workingset */
297         if (arch_get_irn_flags(skip_Proj_const(def)) & arch_irn_flags_dont_spill)
298                 return 0;
299
300         /* give some bonus to rematerialisable nodes */
301         if (remat_bonus > 0) {
302                 costs = be_get_reload_costs_no_weight(senv, def, use.before);
303                 assert(costs * remat_bonus < 1000);
304                 time  += 1000 - (costs * remat_bonus);
305         }
306
307         return time;
308 }
309
310 /**
311  * Performs the actions necessary to grant the request that:
312  * - new_vals can be held in registers
313  * - as few as possible other values are disposed
314  * - the worst values get disposed
315  *
316  * @p is_usage indicates that the values in new_vals are used (not defined)
317  * In this case reloads must be performed
318  */
319 static void displace(workset_t *new_vals, int is_usage)
320 {
321         ir_node **to_insert = ALLOCAN(ir_node*, n_regs);
322         bool     *spilled   = ALLOCAN(bool,     n_regs);
323         ir_node  *val;
324         int       i;
325         int       len;
326         int       spills_needed;
327         int       demand;
328         unsigned  iter;
329
330         /* 1. Identify the number of needed slots and the values to reload */
331         demand = 0;
332         workset_foreach(new_vals, val, iter) {
333                 bool reloaded = false;
334
335                 if (! workset_contains(ws, val)) {
336                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
337                         if (is_usage) {
338                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
339                                 be_add_reload(senv, val, instr, cls, 1);
340                                 reloaded = true;
341                         }
342                 } else {
343                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
344                         assert(is_usage);
345                         /* remove the value from the current workset so it is not accidently
346                          * spilled */
347                         workset_remove(ws, val);
348                 }
349                 spilled[demand]   = reloaded;
350                 to_insert[demand] = val;
351                 ++demand;
352         }
353
354         /* 2. Make room for at least 'demand' slots */
355         len           = workset_get_length(ws);
356         spills_needed = len + demand - n_regs;
357         assert(spills_needed <= len);
358
359         /* Only make more free room if we do not have enough */
360         if (spills_needed > 0) {
361                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
362
363                 /* calculate current next-use distance for live values */
364                 for (i = 0; i < len; ++i) {
365                         ir_node  *val  = workset_get_val(ws, i);
366                         unsigned  dist = get_distance(instr, val, !is_usage);
367                         workset_set_time(ws, i, dist);
368                 }
369
370                 /* sort entries by increasing nextuse-distance*/
371                 workset_sort(ws);
372
373                 for (i = len - spills_needed; i < len; ++i) {
374                         ir_node *val = ws->vals[i].node;
375
376                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
377                              workset_get_time(ws, i)));
378
379                         if (move_spills) {
380                                 if (!USES_IS_INFINITE(ws->vals[i].time)
381                                                 && !ws->vals[i].spilled) {
382                                         ir_node *after_pos = sched_prev(instr);
383                                         DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
384                                                 after_pos));
385                                         be_add_spill(senv, val, after_pos);
386                                 }
387                         }
388                 }
389
390                 /* kill the last 'demand' entries in the array */
391                 workset_set_length(ws, len - spills_needed);
392         }
393
394         /* 3. Insert the new values into the workset */
395         for (i = 0; i < demand; ++i) {
396                 ir_node *val = to_insert[i];
397
398                 workset_insert(ws, val, spilled[i]);
399         }
400 }
401
402 enum {
403         AVAILABLE_EVERYWHERE,
404         AVAILABLE_NOWHERE,
405         AVAILABLE_PARTLY,
406         AVAILABLE_UNKNOWN
407 };
408
409 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
410                                        size_t n_pred_worksets,
411                                        const ir_node *value, bool is_local_phi)
412 {
413         size_t i;
414         bool   avail_everywhere = true;
415         bool   avail_nowhere    = true;
416
417         assert(n_pred_worksets > 0);
418
419         /* value available in all preds? */
420         for (i = 0; i < n_pred_worksets; ++i) {
421                 bool             found     = false;
422                 const workset_t *p_workset = pred_worksets[i];
423                 int              p_len     = workset_get_length(p_workset);
424                 int              p_i;
425                 const ir_node   *l_value;
426
427                 if (is_local_phi) {
428                         assert(is_Phi(value));
429                         l_value = get_irn_n(value, i);
430                 } else {
431                         l_value = value;
432                 }
433
434                 for (p_i = 0; p_i < p_len; ++p_i) {
435                         const loc_t *p_l = &p_workset->vals[p_i];
436                         if (p_l->node != l_value)
437                                 continue;
438
439                         found = true;
440                         break;
441                 }
442
443                 if (found) {
444                         avail_nowhere = false;
445                 } else {
446                         avail_everywhere = false;
447                 }
448         }
449
450         if (avail_everywhere) {
451                 assert(!avail_nowhere);
452                 return AVAILABLE_EVERYWHERE;
453         } else if (avail_nowhere) {
454                 return AVAILABLE_NOWHERE;
455         } else {
456                 return AVAILABLE_PARTLY;
457         }
458 }
459
460 /** Decides whether a specific node should be in the start workset or not
461  *
462  * @param env      belady environment
463  * @param first
464  * @param node     the node to test
465  * @param loop     the loop of the node
466  */
467 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
468                                     ir_loop *loop, unsigned available)
469 {
470         be_next_use_t next_use;
471         loc_t         loc;
472
473         loc.time    = USES_INFINITY;
474         loc.node    = node;
475         loc.spilled = false;
476
477         if (!arch_irn_consider_in_reg_alloc(cls, node)) {
478                 loc.time = USES_INFINITY;
479                 return loc;
480         }
481
482         /* We have to keep nonspillable nodes in the workingset */
483         if (arch_get_irn_flags(skip_Proj_const(node)) & arch_irn_flags_dont_spill) {
484                 loc.time = 0;
485                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
486                 return loc;
487         }
488
489         next_use = be_get_next_use(uses, first, node, 0);
490         if (USES_IS_INFINITE(next_use.time)) {
491                 /* the nodes marked as live in shouldn't be dead, so it must be a phi */
492                 assert(is_Phi(node));
493                 loc.time = USES_INFINITY;
494                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
495                 return loc;
496         }
497
498         loc.time = next_use.time;
499
500         if (improve_known_preds) {
501                 if (available == AVAILABLE_EVERYWHERE) {
502                         DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n",
503                             node, loc.time));
504                         return loc;
505                 } else if (available == AVAILABLE_NOWHERE) {
506                         DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n",
507                             node, loc.time));
508                         loc.time = USES_INFINITY;
509                         return loc;
510                 }
511         }
512
513         if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
514                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
515                     next_use.outermost_loop));
516         } else {
517                 loc.time = USES_PENDING;
518                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
519                     node, next_use.outermost_loop, get_loop_depth(loop)));
520         }
521
522         return loc;
523 }
524
525 /**
526  * Computes the start-workset for a block with multiple predecessors. We assume
527  * that at least 1 of the predeccesors is a back-edge which means we're at the
528  * beginning of a loop. We try to reload as much values as possible now so they
529  * don't get reloaded inside the loop.
530  */
531 static void decide_start_workset(const ir_node *block)
532 {
533         ir_loop    *loop = get_irn_loop(block);
534         ir_node    *first;
535         ir_node    *node;
536         loc_t       loc;
537         loc_t      *starters;
538         loc_t      *delayed;
539         unsigned    len;
540         unsigned    i;
541         int         in;
542         unsigned    ws_count;
543         int         free_slots, free_pressure_slots;
544         unsigned    pressure;
545         int         arity;
546         workset_t **pred_worksets;
547         bool        all_preds_known;
548
549         /* check predecessors */
550         arity           = get_irn_arity(block);
551         pred_worksets   = ALLOCAN(workset_t*, arity);
552         all_preds_known = true;
553         for (in = 0; in < arity; ++in) {
554                 ir_node      *pred_block = get_Block_cfgpred_block(block, in);
555                 block_info_t *pred_info  = get_block_info(pred_block);
556
557                 if (pred_info == NULL) {
558                         pred_worksets[in] = NULL;
559                         all_preds_known   = false;
560                 } else {
561                         pred_worksets[in] = pred_info->end_workset;
562                 }
563         }
564
565         /* Collect all values living at start of block */
566         starters = NEW_ARR_F(loc_t, 0);
567         delayed  = NEW_ARR_F(loc_t, 0);
568
569         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
570         first = sched_first(block);
571
572         /* check all Phis first */
573         sched_foreach(block, node) {
574                 unsigned available;
575
576                 if (! is_Phi(node))
577                         break;
578                 if (!arch_irn_consider_in_reg_alloc(cls, node))
579                         continue;
580
581                 if (all_preds_known) {
582                         available = available_in_all_preds(pred_worksets, arity, node, true);
583                 } else {
584                         available = AVAILABLE_UNKNOWN;
585                 }
586
587                 loc = to_take_or_not_to_take(first, node, loop, available);
588
589                 if (! USES_IS_INFINITE(loc.time)) {
590                         if (USES_IS_PENDING(loc.time))
591                                 ARR_APP1(loc_t, delayed, loc);
592                         else
593                                 ARR_APP1(loc_t, starters, loc);
594                 } else {
595                         be_spill_phi(senv, node);
596                 }
597         }
598
599         /* check all Live-Ins */
600         be_lv_foreach(lv, block, be_lv_state_in, in) {
601                 ir_node *node = be_lv_get_irn(lv, block, in);
602                 unsigned available;
603
604                 if (all_preds_known) {
605                         available = available_in_all_preds(pred_worksets, arity, node, false);
606                 } else {
607                         available = AVAILABLE_UNKNOWN;
608                 }
609
610                 loc = to_take_or_not_to_take(first, node, loop, available);
611
612                 if (! USES_IS_INFINITE(loc.time)) {
613                         if (USES_IS_PENDING(loc.time))
614                                 ARR_APP1(loc_t, delayed, loc);
615                         else
616                                 ARR_APP1(loc_t, starters, loc);
617                 }
618         }
619
620         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
621         assert(ARR_LEN(delayed) <= pressure);
622         free_slots          = n_regs - ARR_LEN(starters);
623         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
624         free_slots          = MIN(free_slots, free_pressure_slots);
625
626         /* so far we only put nodes into the starters list that are used inside
627          * the loop. If register pressure in the loop is low then we can take some
628          * values and let them live through the loop */
629         DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
630             pressure, free_slots));
631         if (free_slots > 0) {
632                 size_t i;
633
634                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
635
636                 for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
637                         int    p, arity;
638                         loc_t *loc = & delayed[i];
639
640                         if (!is_Phi(loc->node)) {
641                                 /* don't use values which are dead in a known predecessors
642                                  * to not induce unnecessary reloads */
643                                 arity = get_irn_arity(block);
644                                 for (p = 0; p < arity; ++p) {
645                                         ir_node      *pred_block = get_Block_cfgpred_block(block, p);
646                                         block_info_t *pred_info  = get_block_info(pred_block);
647
648                                         if (pred_info == NULL)
649                                                 continue;
650
651                                         if (!workset_contains(pred_info->end_workset, loc->node)) {
652                                                 DB((dbg, DBG_START,
653                                                         "    delayed %+F not live at pred %+F\n", loc->node,
654                                                         pred_block));
655                                                 goto skip_delayed;
656                                         }
657                                 }
658                         }
659
660                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
661                         ARR_APP1(loc_t, starters, *loc);
662                         loc->node = NULL;
663                         --free_slots;
664                 skip_delayed:
665                         ;
666                 }
667         }
668
669         /* spill phis (the actual phis not just their values) that are in this block
670          * but not in the start workset */
671         len = ARR_LEN(delayed);
672         for (i = 0; i < len; ++i) {
673                 ir_node *node = delayed[i].node;
674                 if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
675                         continue;
676
677                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
678                 be_spill_phi(senv, node);
679         }
680         DEL_ARR_F(delayed);
681
682         /* Sort start values by first use */
683         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
684
685         /* Copy the best ones from starters to start workset */
686         ws_count = MIN((unsigned) ARR_LEN(starters), n_regs);
687         workset_clear(ws);
688         workset_bulk_fill(ws, ws_count, starters);
689
690         /* spill phis (the actual phis not just their values) that are in this block
691          * but not in the start workset */
692         len = ARR_LEN(starters);
693         for (i = ws_count; i < len; ++i) {
694                 ir_node *node = starters[i].node;
695                 if (! is_Phi(node) || get_nodes_block(node) != block)
696                         continue;
697
698                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
699                 be_spill_phi(senv, node);
700         }
701
702         DEL_ARR_F(starters);
703
704         /* determine spill status of the values: If there's 1 pred block (which
705          * is no backedge) where the value is spilled then we must set it to
706          * spilled here. */
707         for (i = 0; i < ws_count; ++i) {
708                 loc_t   *loc     = &ws->vals[i];
709                 ir_node *value   = loc->node;
710                 bool     spilled;
711                 int      n;
712
713                 /* phis from this block aren't spilled */
714                 if (get_nodes_block(value) == block) {
715                         assert(is_Phi(value));
716                         loc->spilled = false;
717                         continue;
718                 }
719
720                 /* determine if value was spilled on any predecessor */
721                 spilled = false;
722                 for (n = 0; n < arity; ++n) {
723                         workset_t *pred_workset = pred_worksets[n];
724                         int        p_len;
725                         int        p;
726
727                         if (pred_workset == NULL)
728                                 continue;
729
730                         p_len = workset_get_length(pred_workset);
731                         for (p = 0; p < p_len; ++p) {
732                                 loc_t *l = &pred_workset->vals[p];
733
734                                 if (l->node != value)
735                                         continue;
736
737                                 if (l->spilled) {
738                                         spilled = true;
739                                 }
740                                 break;
741                         }
742                 }
743
744                 loc->spilled = spilled;
745         }
746 }
747
748 /**
749  * For the given block @p block, decide for each values
750  * whether it is used from a register or is reloaded
751  * before the use.
752  */
753 static void process_block(ir_node *block)
754 {
755         workset_t    *new_vals;
756         ir_node      *irn;
757         unsigned      iter;
758         block_info_t *block_info;
759         int           arity;
760
761         /* no need to process a block twice */
762         assert(get_block_info(block) == NULL);
763
764         /* construct start workset */
765         arity = get_Block_n_cfgpreds(block);
766         if (arity == 0) {
767                 /* no predecessor -> empty set */
768                 workset_clear(ws);
769         } else if (arity == 1) {
770                 /* one predecessor, copy its end workset */
771                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
772                 block_info_t *pred_info  = get_block_info(pred_block);
773
774                 assert(pred_info != NULL);
775                 workset_copy(ws, pred_info->end_workset);
776         } else {
777                 /* multiple predecessors, do more advanced magic :) */
778                 decide_start_workset(block);
779         }
780
781         DB((dbg, DBG_DECIDE, "\n"));
782         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
783
784         block_info = new_block_info();
785         set_block_info(block, block_info);
786
787         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
788         workset_foreach(ws, irn, iter) {
789                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
790                      workset_get_time(ws, iter)));
791         }
792
793         block_info->start_workset = workset_clone(ws);
794
795         /* process the block from start to end */
796         DB((dbg, DBG_WSETS, "Processing...\n"));
797         /* TODO: this leaks (into the obstack)... */
798         new_vals = new_workset();
799
800         sched_foreach(block, irn) {
801                 int i, arity;
802                 ir_node *value;
803                 assert(workset_get_length(ws) <= n_regs);
804
805                 /* Phis are no real instr (see insert_starters()) */
806                 if (is_Phi(irn)) {
807                         continue;
808                 }
809                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
810
811                 /* set instruction in the workset */
812                 instr = irn;
813
814                 /* allocate all values _used_ by this instruction */
815                 workset_clear(new_vals);
816                 for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
817                         ir_node *in = get_irn_n(irn, i);
818                         if (!arch_irn_consider_in_reg_alloc(cls, in))
819                                 continue;
820
821                         /* (note that "spilled" is irrelevant here) */
822                         workset_insert(new_vals, in, false);
823                 }
824                 displace(new_vals, 1);
825
826                 /* allocate all values _defined_ by this instruction */
827                 workset_clear(new_vals);
828                 be_foreach_definition(irn, cls, value,
829                         assert(req_->width == 1);
830                         workset_insert(new_vals, value, false);
831                 );
832                 displace(new_vals, 0);
833         }
834
835         /* Remember end-workset for this block */
836         block_info->end_workset = workset_clone(ws);
837         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
838         workset_foreach(ws, irn, iter)
839                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn, workset_get_time(ws, iter)));
840 }
841
842 /**
843  * 'decide' is block-local and makes assumptions
844  * about the set of live-ins. Thus we must adapt the
845  * live-outs to the live-ins at each block-border.
846  */
847 static void fix_block_borders(ir_node *block, void *data)
848 {
849         workset_t *start_workset;
850         int        arity;
851         int        i;
852         unsigned   iter;
853         (void) data;
854
855         DB((dbg, DBG_FIX, "\n"));
856         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
857
858         arity = get_irn_arity(block);
859         /* can happen for endless loops */
860         if (arity == 0)
861                 return;
862
863         start_workset = get_block_info(block)->start_workset;
864
865         /* process all pred blocks */
866         for (i = 0; i < arity; ++i) {
867                 ir_node   *pred = get_Block_cfgpred_block(block, i);
868                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
869                 ir_node   *node;
870
871                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
872
873                 /* spill all values not used anymore */
874                 workset_foreach(pred_end_workset, node, iter) {
875                         ir_node *n2;
876                         unsigned iter2;
877                         bool     found = false;
878                         workset_foreach(start_workset, n2, iter2) {
879                                 if (n2 == node) {
880                                         found = true;
881                                         break;
882                                 }
883                                 /* note that we do not look at phi inputs, becuase the values
884                                  * will be either live-end and need no spill or
885                                  * they have other users in which must be somewhere else in the
886                                  * workset */
887                         }
888
889                         if (found)
890                                 continue;
891
892                         if (move_spills && be_is_live_in(lv, block, node)
893                                         && !pred_end_workset->vals[iter].spilled) {
894                                 ir_node *insert_point;
895                                 if (arity > 1) {
896                                         insert_point = be_get_end_of_block_insertion_point(pred);
897                                         insert_point = sched_prev(insert_point);
898                                 } else {
899                                         insert_point = block;
900                                 }
901                                 DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
902                                      insert_point));
903                                 be_add_spill(senv, node, insert_point);
904                         }
905                 }
906
907                 /* reload missing values in predecessors, add missing spills */
908                 workset_foreach(start_workset, node, iter) {
909                         const loc_t *l    = &start_workset->vals[iter];
910                         const loc_t *pred_loc;
911
912                         /* if node is a phi of the current block we reload
913                          * the corresponding argument, else node itself */
914                         if (is_Phi(node) && get_nodes_block(node) == block) {
915                                 node = get_irn_n(node, i);
916                                 assert(!l->spilled);
917
918                                 /* we might have unknowns as argument for the phi */
919                                 if (!arch_irn_consider_in_reg_alloc(cls, node))
920                                         continue;
921                         }
922
923                         /* check if node is in a register at end of pred */
924                         pred_loc = workset_contains(pred_end_workset, node);
925                         if (pred_loc != NULL) {
926                                 /* we might have to spill value on this path */
927                                 if (move_spills && !pred_loc->spilled && l->spilled) {
928                                         ir_node *insert_point
929                                                 = be_get_end_of_block_insertion_point(pred);
930                                         insert_point = sched_prev(insert_point);
931                                         DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
932                                             insert_point));
933                                         be_add_spill(senv, node, insert_point);
934                                 }
935                         } else {
936                                 /* node is not in register at the end of pred -> reload it */
937                                 DB((dbg, DBG_FIX, "    reload %+F\n", node));
938                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
939                                 be_add_reload_on_edge(senv, node, block, i, cls, 1);
940                         }
941                 }
942         }
943 }
944
945 static void be_spill_belady(ir_graph *irg, const arch_register_class_t *rcls)
946 {
947         int i;
948
949         be_liveness_assure_sets(be_assure_liveness(irg));
950
951         stat_ev_tim_push();
952         assure_loopinfo(irg);
953         stat_ev_tim_pop("belady_time_backedges");
954
955         stat_ev_tim_push();
956         be_clear_links(irg);
957         stat_ev_tim_pop("belady_time_clear_links");
958
959         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
960
961         /* init belady env */
962         stat_ev_tim_push();
963         obstack_init(&obst);
964         cls       = rcls;
965         lv        = be_get_irg_liveness(irg);
966         n_regs    = be_get_n_allocatable_regs(irg, cls);
967         ws        = new_workset();
968         uses      = be_begin_uses(irg, lv);
969         loop_ana  = be_new_loop_pressure(irg, cls);
970         senv      = be_new_spill_env(irg);
971         blocklist = be_get_cfgpostorder(irg);
972         stat_ev_tim_pop("belady_time_init");
973
974         stat_ev_tim_push();
975         /* walk blocks in reverse postorder */
976         for (i = ARR_LEN(blocklist) - 1; i >= 0; --i) {
977                 process_block(blocklist[i]);
978         }
979         DEL_ARR_F(blocklist);
980         stat_ev_tim_pop("belady_time_belady");
981
982         stat_ev_tim_push();
983         /* belady was block-local, fix the global flow by adding reloads on the
984          * edges */
985         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
986         stat_ev_tim_pop("belady_time_fix_borders");
987
988         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
989
990         /* Insert spill/reload nodes into the graph and fix usages */
991         be_insert_spills_reloads(senv);
992
993         /* clean up */
994         be_delete_spill_env(senv);
995         be_end_uses(uses);
996         be_free_loop_pressure(loop_ana);
997         obstack_free(&obst, NULL);
998 }
999
1000 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady)
1001 void be_init_spillbelady(void)
1002 {
1003         static be_spiller_t belady_spiller = {
1004                 be_spill_belady
1005         };
1006         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
1007         lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
1008         lc_opt_add_table(belady_group, options);
1009
1010         be_register_spiller("belady", &belady_spiller);
1011         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
1012 }