sparc: remove old code which was only valid for old beabi handling
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  * @version     $Id$
26  */
27 #include "config.h"
28
29 #include <stdbool.h>
30
31 #include "obst.h"
32 #include "irprintf_t.h"
33 #include "irgraph.h"
34 #include "irnode.h"
35 #include "irmode.h"
36 #include "irgwalk.h"
37 #include "irloop.h"
38 #include "iredges_t.h"
39 #include "ircons_t.h"
40 #include "irprintf.h"
41 #include "irnodeset.h"
42
43 #include "beutil.h"
44 #include "bearch.h"
45 #include "beuses.h"
46 #include "besched.h"
47 #include "beirgmod.h"
48 #include "belive_t.h"
49 #include "benode.h"
50 #include "bechordal_t.h"
51 #include "bespill.h"
52 #include "beloopana.h"
53 #include "beirg.h"
54 #include "bespillutil.h"
55 #include "bemodule.h"
56
57 #define DBG_SPILL     1
58 #define DBG_WSETS     2
59 #define DBG_FIX       4
60 #define DBG_DECIDE    8
61 #define DBG_START    16
62 #define DBG_SLOTS    32
63 #define DBG_TRACE    64
64 #define DBG_WORKSET 128
65 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
66
67 #define TIME_UNDEFINED 6666
68
69 /**
70  * An association between a node and a point in time.
71  */
72 typedef struct loc_t {
73         ir_node          *node;
74         unsigned          time;     /**< A use time (see beuses.h). */
75         bool              spilled;  /**< the value was already spilled on this path */
76 } loc_t;
77
78 typedef struct _workset_t {
79         int   len;          /**< current length */
80         loc_t vals[0];      /**< inlined array of the values/distances in this working set */
81 } workset_t;
82
83 static struct obstack               obst;
84 static const arch_register_class_t *cls;
85 static const be_lv_t               *lv;
86 static be_loopana_t                *loop_ana;
87 static int                          n_regs;
88 static workset_t                   *ws;     /**< the main workset used while
89                                                      processing a block. */
90 static be_uses_t                   *uses;   /**< env for the next-use magic */
91 static ir_node                     *instr;  /**< current instruction */
92 static unsigned                     instr_nr; /**< current instruction number
93                                                        (relative to block start) */
94 static spill_env_t                 *senv;   /**< see bespill.h */
95 static ir_node                    **blocklist;
96
97 static int                          move_spills      = true;
98 static int                          respectloopdepth = true;
99 static int                          improve_known_preds = true;
100 /* factor to weight the different costs of reloading/rematerializing a node
101    (see bespill.h be_get_reload_costs_no_weight) */
102 static int                          remat_bonus      = 10;
103
104 static const lc_opt_table_entry_t options[] = {
105         LC_OPT_ENT_BOOL   ("movespills", "try to move spills out of loops", &move_spills),
106         LC_OPT_ENT_BOOL   ("respectloopdepth", "exprimental (outermost loop cutting)", &respectloopdepth),
107         LC_OPT_ENT_BOOL   ("improveknownpreds", "experimental (known preds cutting)", &improve_known_preds),
108         LC_OPT_ENT_INT    ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
109         LC_OPT_LAST
110 };
111
112 static int loc_compare(const void *a, const void *b)
113 {
114         const loc_t *p = a;
115         const loc_t *q = b;
116         return p->time - q->time;
117 }
118
119 /**
120  * Alloc a new workset on obstack @p ob with maximum size @p max
121  */
122 static workset_t *new_workset(void)
123 {
124         return OALLOCFZ(&obst, workset_t, vals, n_regs);
125 }
126
127 /**
128  * Alloc a new instance on obstack and make it equal to @param workset
129  */
130 static workset_t *workset_clone(workset_t *workset)
131 {
132         workset_t *res = OALLOCF(&obst, workset_t, vals, n_regs);
133         memcpy(res, workset, sizeof(*res) + n_regs * sizeof(res->vals[0]));
134         return res;
135 }
136
137 /**
138  * Copy workset @param src to @param tgt
139  */
140 static void workset_copy(workset_t *dest, const workset_t *src)
141 {
142         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
143         memcpy(dest, src, size);
144 }
145
146 /**
147  * Overwrites the current content array of @param ws with the
148  * @param count locations given at memory @param locs.
149  * Set the length of @param ws to count.
150  */
151 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
152 {
153         workset->len = count;
154         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
155 }
156
157 /**
158  * Inserts the value @p val into the workset, iff it is not
159  * already contained. The workset must not be full.
160  */
161 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
162 {
163         loc_t *loc;
164         int    i;
165         /* check for current regclass */
166         assert(arch_irn_consider_in_reg_alloc(cls, val));
167
168         /* check if val is already contained */
169         for (i = 0; i < workset->len; ++i) {
170                 loc = &workset->vals[i];
171                 if (loc->node == val) {
172                         if (spilled) {
173                                 loc->spilled = true;
174                         }
175                         return;
176                 }
177         }
178
179         /* insert val */
180         assert(workset->len < n_regs && "Workset already full!");
181         loc           = &workset->vals[workset->len];
182         loc->node     = val;
183         loc->spilled  = spilled;
184         loc->time     = TIME_UNDEFINED;
185         workset->len++;
186 }
187
188 /**
189  * Removes all entries from this workset
190  */
191 static void workset_clear(workset_t *workset)
192 {
193         workset->len = 0;
194 }
195
196 /**
197  * Removes the value @p val from the workset if present.
198  */
199 static inline void workset_remove(workset_t *workset, ir_node *val)
200 {
201         int i;
202         for (i = 0; i < workset->len; ++i) {
203                 if (workset->vals[i].node == val) {
204                         workset->vals[i] = workset->vals[--workset->len];
205                         return;
206                 }
207         }
208 }
209
210 static inline const loc_t *workset_contains(const workset_t *ws,
211                                             const ir_node *val)
212 {
213         int i;
214
215         for (i = 0; i < ws->len; ++i) {
216                 if (ws->vals[i].node == val)
217                         return &ws->vals[i];
218         }
219
220         return NULL;
221 }
222
223 /**
224  * Iterates over all values in the working set.
225  * @p ws The workset to iterate
226  * @p v  A variable to put the current value in
227  * @p i  An integer for internal use
228  */
229 #define workset_foreach(ws, v, i)       for (i=0; \
230                                                                                 v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; \
231                                                                                 ++i)
232
233 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
234 #define workset_get_time(ws, i) (ws)->vals[i].time
235 #define workset_set_length(ws, length) (ws)->len = length
236 #define workset_get_length(ws) ((ws)->len)
237 #define workset_get_val(ws, i) ((ws)->vals[i].node)
238 #define workset_sort(ws) do { qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare); } while(0)
239
240 typedef struct _block_info_t
241 {
242         workset_t *start_workset;
243         workset_t *end_workset;
244 } block_info_t;
245
246
247 static block_info_t *new_block_info(void)
248 {
249         return OALLOCZ(&obst, block_info_t);
250 }
251
252 #define get_block_info(block)        ((block_info_t *)get_irn_link(block))
253 #define set_block_info(block, info)  set_irn_link(block, info)
254
255 /**
256  * @return The distance to the next use or 0 if irn has dont_spill flag set
257  */
258 static inline unsigned get_distance(ir_node *from, unsigned from_step,
259                                     const ir_node *def, int skip_from_uses)
260 {
261         be_next_use_t use;
262         unsigned      costs;
263         unsigned      time;
264
265         assert(!arch_irn_is_ignore(def));
266
267         use  = be_get_next_use(uses, from, from_step, def, skip_from_uses);
268         time = use.time;
269         if (USES_IS_INFINITE(time))
270                 return USES_INFINITY;
271
272         /* We have to keep nonspillable nodes in the workingset */
273         if (arch_irn_get_flags(skip_Proj_const(def)) & arch_irn_flags_dont_spill)
274                 return 0;
275
276         /* give some bonus to rematerialisable nodes */
277         if (remat_bonus > 0) {
278                 costs = be_get_reload_costs_no_weight(senv, def, use.before);
279                 assert(costs * remat_bonus < 1000);
280                 time  += 1000 - (costs * remat_bonus);
281         }
282
283         return time;
284 }
285
286 /**
287  * Performs the actions necessary to grant the request that:
288  * - new_vals can be held in registers
289  * - as few as possible other values are disposed
290  * - the worst values get disposed
291  *
292  * @p is_usage indicates that the values in new_vals are used (not defined)
293  * In this case reloads must be performed
294  */
295 static void displace(workset_t *new_vals, int is_usage)
296 {
297         ir_node **to_insert = ALLOCAN(ir_node*, n_regs);
298         bool     *spilled   = ALLOCAN(bool,     n_regs);
299         ir_node  *val;
300         int       i;
301         int       len;
302         int       spills_needed;
303         int       demand;
304         int       iter;
305
306         /* 1. Identify the number of needed slots and the values to reload */
307         demand = 0;
308         workset_foreach(new_vals, val, iter) {
309                 bool reloaded = false;
310
311                 if (! workset_contains(ws, val)) {
312                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
313                         if (is_usage) {
314                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
315                                 be_add_reload(senv, val, instr, cls, 1);
316                                 reloaded = true;
317                         }
318                 } else {
319                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
320                         assert(is_usage);
321                         /* remove the value from the current workset so it is not accidently
322                          * spilled */
323                         workset_remove(ws, val);
324                 }
325                 spilled[demand]   = reloaded;
326                 to_insert[demand] = val;
327                 ++demand;
328         }
329
330         /* 2. Make room for at least 'demand' slots */
331         len           = workset_get_length(ws);
332         spills_needed = len + demand - n_regs;
333         assert(spills_needed <= len);
334
335         /* Only make more free room if we do not have enough */
336         if (spills_needed > 0) {
337                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
338
339                 /* calculate current next-use distance for live values */
340                 for (i = 0; i < len; ++i) {
341                         ir_node  *val  = workset_get_val(ws, i);
342                         unsigned  dist = get_distance(instr, instr_nr, val, !is_usage);
343                         workset_set_time(ws, i, dist);
344                 }
345
346                 /* sort entries by increasing nextuse-distance*/
347                 workset_sort(ws);
348
349                 for (i = len - spills_needed; i < len; ++i) {
350                         ir_node *val = ws->vals[i].node;
351
352                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
353                              workset_get_time(ws, i)));
354
355                         if (move_spills) {
356                                 if (!USES_IS_INFINITE(ws->vals[i].time)
357                                                 && !ws->vals[i].spilled) {
358                                         ir_node *after_pos = sched_prev(instr);
359                                         DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
360                                                 after_pos));
361                                         be_add_spill(senv, val, after_pos);
362                                 }
363                         }
364                 }
365
366                 /* kill the last 'demand' entries in the array */
367                 workset_set_length(ws, len - spills_needed);
368         }
369
370         /* 3. Insert the new values into the workset */
371         for (i = 0; i < demand; ++i) {
372                 ir_node *val = to_insert[i];
373
374                 workset_insert(ws, val, spilled[i]);
375         }
376 }
377
378 enum {
379         AVAILABLE_EVERYWHERE,
380         AVAILABLE_NOWHERE,
381         AVAILABLE_PARTLY,
382         AVAILABLE_UNKNOWN
383 };
384
385 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
386                                        size_t n_pred_worksets,
387                                        const ir_node *value, bool is_local_phi)
388 {
389         size_t i;
390         bool   avail_everywhere = true;
391         bool   avail_nowhere    = true;
392
393         assert(n_pred_worksets > 0);
394
395         /* value available in all preds? */
396         for (i = 0; i < n_pred_worksets; ++i) {
397                 bool             found     = false;
398                 const workset_t *p_workset = pred_worksets[i];
399                 int              p_len     = workset_get_length(p_workset);
400                 int              p_i;
401                 const ir_node   *l_value;
402
403                 if (is_local_phi) {
404                         assert(is_Phi(value));
405                         l_value = get_irn_n(value, i);
406                 } else {
407                         l_value = value;
408                 }
409
410                 for (p_i = 0; p_i < p_len; ++p_i) {
411                         const loc_t *p_l = &p_workset->vals[p_i];
412                         if (p_l->node != l_value)
413                                 continue;
414
415                         found = true;
416                         break;
417                 }
418
419                 if (found) {
420                         avail_nowhere = false;
421                 } else {
422                         avail_everywhere = false;
423                 }
424         }
425
426         if (avail_everywhere) {
427                 assert(!avail_nowhere);
428                 return AVAILABLE_EVERYWHERE;
429         } else if (avail_nowhere) {
430                 return AVAILABLE_NOWHERE;
431         } else {
432                 return AVAILABLE_PARTLY;
433         }
434 }
435
436 /** Decides whether a specific node should be in the start workset or not
437  *
438  * @param env      belady environment
439  * @param first
440  * @param node     the node to test
441  * @param loop     the loop of the node
442  */
443 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
444                                     ir_loop *loop, unsigned available)
445 {
446         be_next_use_t next_use;
447         loc_t         loc;
448
449         loc.time    = USES_INFINITY;
450         loc.node    = node;
451         loc.spilled = false;
452
453         if (!arch_irn_consider_in_reg_alloc(cls, node)) {
454                 loc.time = USES_INFINITY;
455                 return loc;
456         }
457
458         /* We have to keep nonspillable nodes in the workingset */
459         if (arch_irn_get_flags(skip_Proj_const(node)) & arch_irn_flags_dont_spill) {
460                 loc.time = 0;
461                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
462                 return loc;
463         }
464
465         next_use = be_get_next_use(uses, first, 0, node, 0);
466         if (USES_IS_INFINITE(next_use.time)) {
467                 // the nodes marked as live in shouldn't be dead, so it must be a phi
468                 assert(is_Phi(node));
469                 loc.time = USES_INFINITY;
470                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
471                 return loc;
472         }
473
474         loc.time = next_use.time;
475
476         if (improve_known_preds) {
477                 if (available == AVAILABLE_EVERYWHERE) {
478                         DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n",
479                             node, loc.time));
480                         return loc;
481                 } else if (available == AVAILABLE_NOWHERE) {
482                         DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n",
483                             node, loc.time));
484                         loc.time = USES_INFINITY;
485                         return loc;
486                 }
487         }
488
489         if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
490                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
491                     next_use.outermost_loop));
492         } else {
493                 loc.time = USES_PENDING;
494                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
495                     node, next_use.outermost_loop, get_loop_depth(loop)));
496         }
497
498         return loc;
499 }
500
501 /**
502  * Computes the start-workset for a block with multiple predecessors. We assume
503  * that at least 1 of the predeccesors is a back-edge which means we're at the
504  * beginning of a loop. We try to reload as much values as possible now so they
505  * don't get reloaded inside the loop.
506  */
507 static void decide_start_workset(const ir_node *block)
508 {
509         ir_loop    *loop = get_irn_loop(block);
510         ir_node    *first;
511         ir_node    *node;
512         loc_t       loc;
513         loc_t      *starters;
514         loc_t      *delayed;
515         int         i, len, ws_count;
516         int             free_slots, free_pressure_slots;
517         unsigned    pressure;
518         int         arity;
519         workset_t **pred_worksets;
520         bool        all_preds_known;
521
522         /* check predecessors */
523         arity           = get_irn_arity(block);
524         pred_worksets   = ALLOCAN(workset_t*, arity);
525         all_preds_known = true;
526         for (i = 0; i < arity; ++i) {
527                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
528                 block_info_t *pred_info  = get_block_info(pred_block);
529
530                 if (pred_info == NULL) {
531                         pred_worksets[i] = NULL;
532                         all_preds_known  = false;
533                 } else {
534                         pred_worksets[i] = pred_info->end_workset;
535                 }
536         }
537
538         /* Collect all values living at start of block */
539         starters = NEW_ARR_F(loc_t, 0);
540         delayed  = NEW_ARR_F(loc_t, 0);
541
542         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
543         first = sched_first(block);
544
545         /* check all Phis first */
546         sched_foreach(block, node) {
547                 unsigned available;
548
549                 if (! is_Phi(node))
550                         break;
551                 if (!arch_irn_consider_in_reg_alloc(cls, node))
552                         continue;
553
554                 if (all_preds_known) {
555                         available = available_in_all_preds(pred_worksets, arity, node, true);
556                 } else {
557                         available = AVAILABLE_UNKNOWN;
558                 }
559
560                 loc = to_take_or_not_to_take(first, node, loop, available);
561
562                 if (! USES_IS_INFINITE(loc.time)) {
563                         if (USES_IS_PENDING(loc.time))
564                                 ARR_APP1(loc_t, delayed, loc);
565                         else
566                                 ARR_APP1(loc_t, starters, loc);
567                 } else {
568                         be_spill_phi(senv, node);
569                 }
570         }
571
572         /* check all Live-Ins */
573         be_lv_foreach(lv, block, be_lv_state_in, i) {
574                 ir_node *node = be_lv_get_irn(lv, block, i);
575                 unsigned available;
576
577                 if (all_preds_known) {
578                         available = available_in_all_preds(pred_worksets, arity, node, false);
579                 } else {
580                         available = AVAILABLE_UNKNOWN;
581                 }
582
583                 loc = to_take_or_not_to_take(first, node, loop, available);
584
585                 if (! USES_IS_INFINITE(loc.time)) {
586                         if (USES_IS_PENDING(loc.time))
587                                 ARR_APP1(loc_t, delayed, loc);
588                         else
589                                 ARR_APP1(loc_t, starters, loc);
590                 }
591         }
592
593         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
594         assert(ARR_LEN(delayed) <= (signed)pressure);
595         free_slots          = n_regs - ARR_LEN(starters);
596         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
597         free_slots          = MIN(free_slots, free_pressure_slots);
598
599         /* so far we only put nodes into the starters list that are used inside
600          * the loop. If register pressure in the loop is low then we can take some
601          * values and let them live through the loop */
602         DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
603             pressure, free_slots));
604         if (free_slots > 0) {
605                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
606
607                 for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
608                         int    p, arity;
609                         loc_t *loc = & delayed[i];
610
611                         if (!is_Phi(loc->node)) {
612                                 /* don't use values which are dead in a known predecessors
613                                  * to not induce unnecessary reloads */
614                                 arity = get_irn_arity(block);
615                                 for (p = 0; p < arity; ++p) {
616                                         ir_node      *pred_block = get_Block_cfgpred_block(block, p);
617                                         block_info_t *pred_info  = get_block_info(pred_block);
618
619                                         if (pred_info == NULL)
620                                                 continue;
621
622                                         if (!workset_contains(pred_info->end_workset, loc->node)) {
623                                                 DB((dbg, DBG_START,
624                                                         "    delayed %+F not live at pred %+F\n", loc->node,
625                                                         pred_block));
626                                                 goto skip_delayed;
627                                         }
628                                 }
629                         }
630
631                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
632                         ARR_APP1(loc_t, starters, *loc);
633                         loc->node = NULL;
634                         --free_slots;
635                 skip_delayed:
636                         ;
637                 }
638         }
639
640         /* spill phis (the actual phis not just their values) that are in this block
641          * but not in the start workset */
642         for (i = ARR_LEN(delayed) - 1; i >= 0; --i) {
643                 ir_node *node = delayed[i].node;
644                 if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
645                         continue;
646
647                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
648                 be_spill_phi(senv, node);
649         }
650         DEL_ARR_F(delayed);
651
652         /* Sort start values by first use */
653         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
654
655         /* Copy the best ones from starters to start workset */
656         ws_count = MIN(ARR_LEN(starters), n_regs);
657         workset_clear(ws);
658         workset_bulk_fill(ws, ws_count, starters);
659
660         /* spill phis (the actual phis not just their values) that are in this block
661          * but not in the start workset */
662         len = ARR_LEN(starters);
663         for (i = ws_count; i < len; ++i) {
664                 ir_node *node = starters[i].node;
665                 if (! is_Phi(node) || get_nodes_block(node) != block)
666                         continue;
667
668                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
669                 be_spill_phi(senv, node);
670         }
671
672         DEL_ARR_F(starters);
673
674         /* determine spill status of the values: If there's 1 pred block (which
675          * is no backedge) where the value is spilled then we must set it to
676          * spilled here. */
677         for (i = 0; i < ws_count; ++i) {
678                 loc_t   *loc     = &ws->vals[i];
679                 ir_node *value   = loc->node;
680                 bool     spilled;
681                 int      n;
682
683                 /* phis from this block aren't spilled */
684                 if (get_nodes_block(value) == block) {
685                         assert(is_Phi(value));
686                         loc->spilled = false;
687                         continue;
688                 }
689
690                 /* determine if value was spilled on any predecessor */
691                 spilled = false;
692                 for (n = 0; n < arity; ++n) {
693                         workset_t *pred_workset = pred_worksets[n];
694                         int        p_len;
695                         int        p;
696
697                         if (pred_workset == NULL)
698                                 continue;
699
700                         p_len = workset_get_length(pred_workset);
701                         for (p = 0; p < p_len; ++p) {
702                                 loc_t *l = &pred_workset->vals[p];
703
704                                 if (l->node != value)
705                                         continue;
706
707                                 if (l->spilled) {
708                                         spilled = true;
709                                 }
710                                 break;
711                         }
712                 }
713
714                 loc->spilled = spilled;
715         }
716 }
717
718 /**
719  * For the given block @p block, decide for each values
720  * whether it is used from a register or is reloaded
721  * before the use.
722  */
723 static void process_block(ir_node *block)
724 {
725         workset_t       *new_vals;
726         ir_node         *irn;
727         int              iter;
728         block_info_t    *block_info;
729         int              arity;
730
731         /* no need to process a block twice */
732         assert(get_block_info(block) == NULL);
733
734         /* construct start workset */
735         arity = get_Block_n_cfgpreds(block);
736         if (arity == 0) {
737                 /* no predecessor -> empty set */
738                 workset_clear(ws);
739         } else if (arity == 1) {
740                 /* one predecessor, copy it's end workset */
741                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
742                 block_info_t *pred_info  = get_block_info(pred_block);
743
744                 assert(pred_info != NULL);
745                 workset_copy(ws, pred_info->end_workset);
746         } else {
747                 /* multiple predecessors, do more advanced magic :) */
748                 decide_start_workset(block);
749         }
750
751         DB((dbg, DBG_DECIDE, "\n"));
752         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
753
754         block_info = new_block_info();
755         set_block_info(block, block_info);
756
757         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
758         workset_foreach(ws, irn, iter) {
759                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
760                      workset_get_time(ws, iter)));
761         }
762
763         block_info->start_workset = workset_clone(ws);
764
765         /* process the block from start to end */
766         DB((dbg, DBG_WSETS, "Processing...\n"));
767         instr_nr = 0;
768         /* TODO: this leaks (into the obstack)... */
769         new_vals = new_workset();
770
771         sched_foreach(block, irn) {
772                 int i, arity;
773                 ir_node *value;
774                 assert(workset_get_length(ws) <= n_regs);
775
776                 /* Phis are no real instr (see insert_starters()) */
777                 if (is_Phi(irn)) {
778                         continue;
779                 }
780                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
781
782                 /* set instruction in the workset */
783                 instr = irn;
784
785                 /* allocate all values _used_ by this instruction */
786                 workset_clear(new_vals);
787                 for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
788                         ir_node *in = get_irn_n(irn, i);
789                         if (!arch_irn_consider_in_reg_alloc(cls, in))
790                                 continue;
791
792                         /* (note that "spilled" is irrelevant here) */
793                         workset_insert(new_vals, in, false);
794                 }
795                 displace(new_vals, 1);
796
797                 /* allocate all values _defined_ by this instruction */
798                 workset_clear(new_vals);
799                 be_foreach_definition(irn, cls, value,
800                         workset_insert(new_vals, value, false);
801                 );
802                 displace(new_vals, 0);
803
804                 instr_nr++;
805         }
806
807         /* Remember end-workset for this block */
808         block_info->end_workset = workset_clone(ws);
809         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
810         workset_foreach(ws, irn, iter)
811                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
812                      workset_get_time(ws, iter)));
813 }
814
815 /**
816  * 'decide' is block-local and makes assumptions
817  * about the set of live-ins. Thus we must adapt the
818  * live-outs to the live-ins at each block-border.
819  */
820 static void fix_block_borders(ir_node *block, void *data)
821 {
822         workset_t    *start_workset;
823         int           arity;
824         int           i;
825         int           iter;
826         (void) data;
827
828         DB((dbg, DBG_FIX, "\n"));
829         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
830
831         arity = get_irn_arity(block);
832         /* can happen for endless loops */
833         if (arity == 0)
834                 return;
835
836         start_workset = get_block_info(block)->start_workset;
837
838         /* process all pred blocks */
839         for (i = 0; i < arity; ++i) {
840                 ir_node   *pred = get_Block_cfgpred_block(block, i);
841                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
842                 ir_node   *node;
843
844                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
845
846                 /* spill all values not used anymore */
847                 workset_foreach(pred_end_workset, node, iter) {
848                         ir_node *n2;
849                         int      iter2;
850                         bool     found = false;
851                         workset_foreach(start_workset, n2, iter2) {
852                                 if (n2 == node) {
853                                         found = true;
854                                         break;
855                                 }
856                                 /* note that we do not look at phi inputs, becuase the values
857                                  * will be either live-end and need no spill or
858                                  * they have other users in which must be somewhere else in the
859                                  * workset */
860                         }
861
862                         if (found)
863                                 continue;
864
865                         if (move_spills && be_is_live_in(lv, block, node)
866                                         && !pred_end_workset->vals[iter].spilled) {
867                                 ir_node *insert_point;
868                                 if (arity > 1) {
869                                         insert_point = be_get_end_of_block_insertion_point(pred);
870                                         insert_point = sched_prev(insert_point);
871                                 } else {
872                                         insert_point = block;
873                                 }
874                                 DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
875                                      insert_point));
876                                 be_add_spill(senv, node, insert_point);
877                         }
878                 }
879
880                 /* reload missing values in predecessors, add missing spills */
881                 workset_foreach(start_workset, node, iter) {
882                         const loc_t *l    = &start_workset->vals[iter];
883                         const loc_t *pred_loc;
884
885                         /* if node is a phi of the current block we reload
886                          * the corresponding argument, else node itself */
887                         if (is_Phi(node) && get_nodes_block(node) == block) {
888                                 node = get_irn_n(node, i);
889                                 assert(!l->spilled);
890
891                                 /* we might have unknowns as argument for the phi */
892                                 if (!arch_irn_consider_in_reg_alloc(cls, node))
893                                         continue;
894                         }
895
896                         /* check if node is in a register at end of pred */
897                         pred_loc = workset_contains(pred_end_workset, node);
898                         if (pred_loc != NULL) {
899                                 /* we might have to spill value on this path */
900                                 if (move_spills && !pred_loc->spilled && l->spilled) {
901                                         ir_node *insert_point
902                                                 = be_get_end_of_block_insertion_point(pred);
903                                         insert_point = sched_prev(insert_point);
904                                         DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
905                                             insert_point));
906                                         be_add_spill(senv, node, insert_point);
907                                 }
908                         } else {
909                                 /* node is not in register at the end of pred -> reload it */
910                                 DB((dbg, DBG_FIX, "    reload %+F\n", node));
911                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
912                                 be_add_reload_on_edge(senv, node, block, i, cls, 1);
913                         }
914                 }
915         }
916 }
917
918 static void be_spill_belady(ir_graph *irg, const arch_register_class_t *rcls)
919 {
920         int i;
921
922         be_liveness_assure_sets(be_assure_liveness(irg));
923
924         stat_ev_tim_push();
925         /* construct control flow loop tree */
926         if (! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
927                 construct_cf_backedges(irg);
928         }
929         stat_ev_tim_pop("belady_time_backedges");
930
931         stat_ev_tim_push();
932         be_clear_links(irg);
933         stat_ev_tim_pop("belady_time_clear_links");
934
935         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
936
937         /* init belady env */
938         stat_ev_tim_push();
939         obstack_init(&obst);
940         cls       = rcls;
941         lv        = be_get_irg_liveness(irg);
942         n_regs    = cls->n_regs - be_put_ignore_regs(irg, cls, NULL);
943         ws        = new_workset();
944         uses      = be_begin_uses(irg, lv);
945         loop_ana  = be_new_loop_pressure(irg, cls);
946         senv      = be_new_spill_env(irg);
947         blocklist = be_get_cfgpostorder(irg);
948         stat_ev_tim_pop("belady_time_init");
949
950         stat_ev_tim_push();
951         /* walk blocks in reverse postorder */
952         for (i = ARR_LEN(blocklist) - 1; i >= 0; --i) {
953                 process_block(blocklist[i]);
954         }
955         DEL_ARR_F(blocklist);
956         stat_ev_tim_pop("belady_time_belady");
957
958         stat_ev_tim_push();
959         /* belady was block-local, fix the global flow by adding reloads on the
960          * edges */
961         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
962         stat_ev_tim_pop("belady_time_fix_borders");
963
964         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
965
966         /* Insert spill/reload nodes into the graph and fix usages */
967         be_insert_spills_reloads(senv);
968
969         /* clean up */
970         be_delete_spill_env(senv);
971         be_end_uses(uses);
972         be_free_loop_pressure(loop_ana);
973         obstack_free(&obst, NULL);
974 }
975
976 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);
977 void be_init_spillbelady(void)
978 {
979         static be_spiller_t belady_spiller = {
980                 be_spill_belady
981         };
982         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
983         lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
984         lc_opt_add_table(belady_group, options);
985
986         be_register_spiller("belady", &belady_spiller);
987         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
988 }