becopyopt: Remove the unnecessary attribute name from struct copy_opt_t.
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * This file is part of libFirm.
3  * Copyright (C) 2012 University of Karlsruhe.
4  */
5
6 /**
7  * @file
8  * @brief       Beladys spillalgorithm.
9  * @author      Daniel Grund, Matthias Braun
10  * @date        20.09.2005
11  */
12 #include "config.h"
13
14 #include <stdbool.h>
15
16 #include "obst.h"
17 #include "irgraph.h"
18 #include "irnode.h"
19 #include "irmode.h"
20 #include "irgwalk.h"
21 #include "irloop.h"
22 #include "iredges_t.h"
23 #include "ircons_t.h"
24 #include "irprintf.h"
25 #include "irnodeset.h"
26 #include "irtools.h"
27 #include "statev_t.h"
28 #include "util.h"
29
30 #include "beutil.h"
31 #include "bearch.h"
32 #include "beuses.h"
33 #include "besched.h"
34 #include "beirgmod.h"
35 #include "belive_t.h"
36 #include "benode.h"
37 #include "bechordal_t.h"
38 #include "bespill.h"
39 #include "beloopana.h"
40 #include "beirg.h"
41 #include "bespillutil.h"
42 #include "bemodule.h"
43
44 #define DBG_SPILL     1
45 #define DBG_WSETS     2
46 #define DBG_FIX       4
47 #define DBG_DECIDE    8
48 #define DBG_START    16
49 #define DBG_SLOTS    32
50 #define DBG_TRACE    64
51 #define DBG_WORKSET 128
52 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53
54 #define TIME_UNDEFINED 6666
55
56 /**
57  * An association between a node and a point in time.
58  */
59 typedef struct loc_t {
60         ir_node          *node;
61         unsigned          time;     /**< A use time (see beuses.h). */
62         bool              spilled;  /**< value was already spilled on this path */
63 } loc_t;
64
65 typedef struct workset_t {
66         unsigned len;     /**< current length */
67         loc_t    vals[];  /**< array of the values/distances in this working set */
68 } workset_t;
69
70 static struct obstack               obst;
71 static const arch_register_class_t *cls;
72 static const be_lv_t               *lv;
73 static be_loopana_t                *loop_ana;
74 static unsigned                     n_regs;
75 static workset_t                   *ws;     /**< the main workset used while
76                                                      processing a block. */
77 static be_uses_t                   *uses;   /**< env for the next-use magic */
78 static spill_env_t                 *senv;   /**< see bespill.h */
79 static ir_node                    **blocklist;
80
81 static int                          move_spills      = true;
82 static int                          respectloopdepth = true;
83 static int                          improve_known_preds = true;
84 /* factor to weight the different costs of reloading/rematerializing a node
85    (see bespill.h be_get_reload_costs_no_weight) */
86 static int                          remat_bonus      = 10;
87
88 static const lc_opt_table_entry_t options[] = {
89         LC_OPT_ENT_BOOL   ("movespills", "try to move spills out of loops", &move_spills),
90         LC_OPT_ENT_BOOL   ("respectloopdepth", "outermost loop cutting", &respectloopdepth),
91         LC_OPT_ENT_BOOL   ("improveknownpreds", "known preds cutting", &improve_known_preds),
92         LC_OPT_ENT_INT    ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
93         LC_OPT_LAST
94 };
95
96 /**
97  * Alloc a new workset on obstack @p ob with maximum size @p max
98  */
99 static workset_t *new_workset(void)
100 {
101         return OALLOCFZ(&obst, workset_t, vals, n_regs);
102 }
103
104 /**
105  * Alloc a new instance on obstack and make it equal to @param workset
106  */
107 static workset_t *workset_clone(workset_t *workset)
108 {
109         workset_t *res = OALLOCF(&obst, workset_t, vals, n_regs);
110         memcpy(res, workset, sizeof(*res) + n_regs * sizeof(res->vals[0]));
111         return res;
112 }
113
114 /**
115  * Copy workset @param src to @param tgt
116  */
117 static void workset_copy(workset_t *dest, const workset_t *src)
118 {
119         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
120         memcpy(dest, src, size);
121 }
122
123 /**
124  * Overwrites the current content array of @param ws with the
125  * @param count locations given at memory @param locs.
126  * Set the length of @param ws to count.
127  */
128 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
129 {
130         workset->len = count;
131         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
132 }
133
134 /**
135  * Inserts the value @p val into the workset, iff it is not
136  * already contained. The workset must not be full.
137  */
138 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
139 {
140         loc_t    *loc;
141         unsigned  i;
142         /* check for current regclass */
143         assert(arch_irn_consider_in_reg_alloc(cls, val));
144
145         /* check if val is already contained */
146         for (i = 0; i < workset->len; ++i) {
147                 loc = &workset->vals[i];
148                 if (loc->node == val) {
149                         if (spilled) {
150                                 loc->spilled = true;
151                         }
152                         return;
153                 }
154         }
155
156         /* insert val */
157         assert(workset->len < n_regs && "Workset already full!");
158         loc           = &workset->vals[workset->len];
159         loc->node     = val;
160         loc->spilled  = spilled;
161         loc->time     = TIME_UNDEFINED;
162         workset->len++;
163 }
164
165 /**
166  * Removes all entries from this workset
167  */
168 static void workset_clear(workset_t *workset)
169 {
170         workset->len = 0;
171 }
172
173 /**
174  * Removes the value @p val from the workset if present.
175  */
176 static void workset_remove(workset_t *workset, ir_node *val)
177 {
178         unsigned i;
179         for (i = 0; i < workset->len; ++i) {
180                 if (workset->vals[i].node == val) {
181                         workset->vals[i] = workset->vals[--workset->len];
182                         return;
183                 }
184         }
185 }
186
187 static const loc_t *workset_contains(const workset_t *ws, const ir_node *val)
188 {
189         unsigned i;
190         for (i = 0; i < ws->len; ++i) {
191                 if (ws->vals[i].node == val)
192                         return &ws->vals[i];
193         }
194
195         return NULL;
196 }
197
198 static int loc_compare(const void *a, const void *b)
199 {
200         const loc_t   *p  = ((const loc_t*) a);
201         const loc_t   *q  = ((const loc_t*) b);
202         const unsigned pt = p->time;
203         const unsigned qt = q->time;
204
205         if (pt < qt)
206                 return -1;
207         if (pt > qt)
208                 return 1;
209
210         return get_irn_node_nr(p->node) - get_irn_node_nr(q->node);
211 }
212
213 static void workset_sort(workset_t *workset)
214 {
215         qsort(workset->vals, workset->len, sizeof(workset->vals[0]), loc_compare);
216 }
217
218 static inline unsigned workset_get_time(const workset_t *workset, unsigned idx)
219 {
220         return workset->vals[idx].time;
221 }
222
223 static inline void workset_set_time(workset_t *workset, unsigned idx,
224                                     unsigned time)
225 {
226         workset->vals[idx].time = time;
227 }
228
229 static inline unsigned workset_get_length(const workset_t *workset)
230 {
231         return workset->len;
232 }
233
234 static inline void workset_set_length(workset_t *workset, unsigned len)
235 {
236         workset->len = len;
237 }
238
239 static inline ir_node *workset_get_val(const workset_t *workset, unsigned idx)
240 {
241         return workset->vals[idx].node;
242 }
243
244 /**
245  * Iterates over all values in the working set.
246  * @p ws The workset to iterate
247  * @p v  A variable to put the current value in
248  * @p i  An integer for internal use
249  */
250 #define workset_foreach(ws, v, i) \
251         for (i=0; v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; ++i)
252
253 typedef struct block_info_t {
254         workset_t *start_workset;
255         workset_t *end_workset;
256 } block_info_t;
257
258 static block_info_t *new_block_info(void)
259 {
260         return OALLOCZ(&obst, block_info_t);
261 }
262
263 static inline block_info_t *get_block_info(const ir_node *block)
264 {
265         return (block_info_t*)get_irn_link(block);
266 }
267
268 static inline void set_block_info(ir_node *block, block_info_t *info)
269 {
270         set_irn_link(block, info);
271 }
272
273 /**
274  * @return The distance to the next use or 0 if irn has dont_spill flag set
275  */
276 static unsigned get_distance(ir_node *from, const ir_node *def, int skip_from_uses)
277 {
278         be_next_use_t use;
279         unsigned      costs;
280         unsigned      time;
281
282         assert(!arch_irn_is_ignore(def));
283
284         use  = be_get_next_use(uses, from, def, skip_from_uses);
285         time = use.time;
286         if (USES_IS_INFINITE(time))
287                 return USES_INFINITY;
288
289         /* We have to keep nonspillable nodes in the workingset */
290         if (arch_get_irn_flags(skip_Proj_const(def)) & arch_irn_flags_dont_spill)
291                 return 0;
292
293         /* give some bonus to rematerialisable nodes */
294         if (remat_bonus > 0) {
295                 costs = be_get_reload_costs_no_weight(senv, def, use.before);
296                 assert(costs * remat_bonus < 1000);
297                 time  += 1000 - (costs * remat_bonus);
298         }
299
300         return time;
301 }
302
303 /**
304  * Performs the actions necessary to grant the request that:
305  * - new_vals can be held in registers
306  * - as few as possible other values are disposed
307  * - the worst values get disposed
308  *
309  * @p is_usage indicates that the values in new_vals are used (not defined)
310  * In this case reloads must be performed
311  */
312 static void displace(workset_t *const new_vals, int const is_usage, ir_node *const instr)
313 {
314         ir_node **to_insert = ALLOCAN(ir_node*, n_regs);
315         bool     *spilled   = ALLOCAN(bool,     n_regs);
316         ir_node  *val;
317         int       i;
318         int       len;
319         int       spills_needed;
320         int       demand;
321         unsigned  iter;
322
323         /* 1. Identify the number of needed slots and the values to reload */
324         demand = 0;
325         workset_foreach(new_vals, val, iter) {
326                 bool reloaded = false;
327
328                 if (! workset_contains(ws, val)) {
329                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
330                         if (is_usage) {
331                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
332                                 be_add_reload(senv, val, instr, cls, 1);
333                                 reloaded = true;
334                         }
335                 } else {
336                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
337                         assert(is_usage);
338                         /* remove the value from the current workset so it is not accidently
339                          * spilled */
340                         workset_remove(ws, val);
341                 }
342                 spilled[demand]   = reloaded;
343                 to_insert[demand] = val;
344                 ++demand;
345         }
346
347         /* 2. Make room for at least 'demand' slots */
348         len           = workset_get_length(ws);
349         spills_needed = len + demand - n_regs;
350         assert(spills_needed <= len);
351
352         /* Only make more free room if we do not have enough */
353         if (spills_needed > 0) {
354                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
355
356                 /* calculate current next-use distance for live values */
357                 for (i = 0; i < len; ++i) {
358                         ir_node  *val  = workset_get_val(ws, i);
359                         unsigned  dist = get_distance(instr, val, !is_usage);
360                         workset_set_time(ws, i, dist);
361                 }
362
363                 /* sort entries by increasing nextuse-distance*/
364                 workset_sort(ws);
365
366                 for (i = len - spills_needed; i < len; ++i) {
367                         ir_node *val = ws->vals[i].node;
368
369                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
370                              workset_get_time(ws, i)));
371
372                         if (move_spills) {
373                                 if (!USES_IS_INFINITE(ws->vals[i].time)
374                                                 && !ws->vals[i].spilled) {
375                                         ir_node *after_pos = sched_prev(instr);
376                                         DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
377                                                 after_pos));
378                                         be_add_spill(senv, val, after_pos);
379                                 }
380                         }
381                 }
382
383                 /* kill the last 'demand' entries in the array */
384                 workset_set_length(ws, len - spills_needed);
385         }
386
387         /* 3. Insert the new values into the workset */
388         for (i = 0; i < demand; ++i) {
389                 ir_node *val = to_insert[i];
390
391                 workset_insert(ws, val, spilled[i]);
392         }
393 }
394
395 enum {
396         AVAILABLE_EVERYWHERE,
397         AVAILABLE_NOWHERE,
398         AVAILABLE_PARTLY,
399         AVAILABLE_UNKNOWN
400 };
401
402 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
403                                        size_t n_pred_worksets,
404                                        const ir_node *value, bool is_local_phi)
405 {
406         size_t i;
407         bool   avail_everywhere = true;
408         bool   avail_nowhere    = true;
409
410         assert(n_pred_worksets > 0);
411
412         /* value available in all preds? */
413         for (i = 0; i < n_pred_worksets; ++i) {
414                 bool             found     = false;
415                 const workset_t *p_workset = pred_worksets[i];
416                 int              p_len     = workset_get_length(p_workset);
417                 int              p_i;
418                 const ir_node   *l_value;
419
420                 if (is_local_phi) {
421                         assert(is_Phi(value));
422                         l_value = get_irn_n(value, i);
423                 } else {
424                         l_value = value;
425                 }
426
427                 for (p_i = 0; p_i < p_len; ++p_i) {
428                         const loc_t *p_l = &p_workset->vals[p_i];
429                         if (p_l->node != l_value)
430                                 continue;
431
432                         found = true;
433                         break;
434                 }
435
436                 if (found) {
437                         avail_nowhere = false;
438                 } else {
439                         avail_everywhere = false;
440                 }
441         }
442
443         if (avail_everywhere) {
444                 assert(!avail_nowhere);
445                 return AVAILABLE_EVERYWHERE;
446         } else if (avail_nowhere) {
447                 return AVAILABLE_NOWHERE;
448         } else {
449                 return AVAILABLE_PARTLY;
450         }
451 }
452
453 /** Decides whether a specific node should be in the start workset or not
454  *
455  * @param env      belady environment
456  * @param first
457  * @param node     the node to test
458  * @param loop     the loop of the node
459  */
460 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
461                                     ir_loop *loop, unsigned available)
462 {
463         be_next_use_t next_use;
464         loc_t         loc;
465
466         loc.time    = USES_INFINITY;
467         loc.node    = node;
468         loc.spilled = false;
469
470         /* We have to keep nonspillable nodes in the workingset */
471         if (arch_get_irn_flags(skip_Proj_const(node)) & arch_irn_flags_dont_spill) {
472                 loc.time = 0;
473                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
474                 return loc;
475         }
476
477         next_use = be_get_next_use(uses, first, node, 0);
478         if (USES_IS_INFINITE(next_use.time)) {
479                 /* the nodes marked as live in shouldn't be dead, so it must be a phi */
480                 assert(is_Phi(node));
481                 loc.time = USES_INFINITY;
482                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
483                 return loc;
484         }
485
486         loc.time = next_use.time;
487
488         if (improve_known_preds) {
489                 if (available == AVAILABLE_EVERYWHERE) {
490                         DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n",
491                             node, loc.time));
492                         return loc;
493                 } else if (available == AVAILABLE_NOWHERE) {
494                         DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n",
495                             node, loc.time));
496                         loc.time = USES_INFINITY;
497                         return loc;
498                 }
499         }
500
501         if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
502                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
503                     next_use.outermost_loop));
504         } else {
505                 loc.time = USES_PENDING;
506                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
507                     node, next_use.outermost_loop, get_loop_depth(loop)));
508         }
509
510         return loc;
511 }
512
513 /**
514  * Computes the start-workset for a block with multiple predecessors. We assume
515  * that at least 1 of the predeccesors is a back-edge which means we're at the
516  * beginning of a loop. We try to reload as much values as possible now so they
517  * don't get reloaded inside the loop.
518  */
519 static void decide_start_workset(ir_node *const block)
520 {
521         ir_loop    *loop = get_irn_loop(block);
522         ir_node    *first;
523         loc_t       loc;
524         loc_t      *starters;
525         loc_t      *delayed;
526         unsigned    len;
527         unsigned    i;
528         unsigned    ws_count;
529         int         free_slots, free_pressure_slots;
530         unsigned    pressure;
531         int         arity;
532         workset_t **pred_worksets;
533         bool        all_preds_known;
534
535         /* check predecessors */
536         arity           = get_irn_arity(block);
537         pred_worksets   = ALLOCAN(workset_t*, arity);
538         all_preds_known = true;
539         for (int in = 0; in < arity; ++in) {
540                 ir_node      *pred_block = get_Block_cfgpred_block(block, in);
541                 block_info_t *pred_info  = get_block_info(pred_block);
542
543                 if (pred_info == NULL) {
544                         pred_worksets[in] = NULL;
545                         all_preds_known   = false;
546                 } else {
547                         pred_worksets[in] = pred_info->end_workset;
548                 }
549         }
550
551         /* Collect all values living at start of block */
552         starters = NEW_ARR_F(loc_t, 0);
553         delayed  = NEW_ARR_F(loc_t, 0);
554
555         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
556         first = sched_first(block);
557
558         /* check all Phis first */
559         sched_foreach(block, node) {
560                 unsigned available;
561
562                 if (! is_Phi(node))
563                         break;
564                 if (!arch_irn_consider_in_reg_alloc(cls, node))
565                         continue;
566
567                 if (all_preds_known) {
568                         available = available_in_all_preds(pred_worksets, arity, node, true);
569                 } else {
570                         available = AVAILABLE_UNKNOWN;
571                 }
572
573                 loc = to_take_or_not_to_take(first, node, loop, available);
574
575                 if (! USES_IS_INFINITE(loc.time)) {
576                         if (USES_IS_PENDING(loc.time))
577                                 ARR_APP1(loc_t, delayed, loc);
578                         else
579                                 ARR_APP1(loc_t, starters, loc);
580                 } else {
581                         be_spill_phi(senv, node);
582                 }
583         }
584
585         /* check all Live-Ins */
586         be_lv_foreach_cls(lv, block, be_lv_state_in, cls, node) {
587                 unsigned available;
588                 if (all_preds_known) {
589                         available = available_in_all_preds(pred_worksets, arity, node, false);
590                 } else {
591                         available = AVAILABLE_UNKNOWN;
592                 }
593
594                 loc = to_take_or_not_to_take(first, node, loop, available);
595
596                 if (! USES_IS_INFINITE(loc.time)) {
597                         if (USES_IS_PENDING(loc.time))
598                                 ARR_APP1(loc_t, delayed, loc);
599                         else
600                                 ARR_APP1(loc_t, starters, loc);
601                 }
602         }
603
604         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
605         assert(ARR_LEN(delayed) <= pressure);
606         free_slots          = n_regs - ARR_LEN(starters);
607         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
608         free_slots          = MIN(free_slots, free_pressure_slots);
609
610         /* so far we only put nodes into the starters list that are used inside
611          * the loop. If register pressure in the loop is low then we can take some
612          * values and let them live through the loop */
613         DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
614             pressure, free_slots));
615         if (free_slots > 0) {
616                 size_t i;
617
618                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
619
620                 for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
621                         int    p, arity;
622                         loc_t *loc = & delayed[i];
623
624                         if (!is_Phi(loc->node)) {
625                                 /* don't use values which are dead in a known predecessors
626                                  * to not induce unnecessary reloads */
627                                 arity = get_irn_arity(block);
628                                 for (p = 0; p < arity; ++p) {
629                                         ir_node      *pred_block = get_Block_cfgpred_block(block, p);
630                                         block_info_t *pred_info  = get_block_info(pred_block);
631
632                                         if (pred_info == NULL)
633                                                 continue;
634
635                                         if (!workset_contains(pred_info->end_workset, loc->node)) {
636                                                 DB((dbg, DBG_START,
637                                                         "    delayed %+F not live at pred %+F\n", loc->node,
638                                                         pred_block));
639                                                 goto skip_delayed;
640                                         }
641                                 }
642                         }
643
644                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
645                         ARR_APP1(loc_t, starters, *loc);
646                         loc->node = NULL;
647                         --free_slots;
648                 skip_delayed:
649                         ;
650                 }
651         }
652
653         /* spill phis (the actual phis not just their values) that are in this block
654          * but not in the start workset */
655         len = ARR_LEN(delayed);
656         for (i = 0; i < len; ++i) {
657                 ir_node *node = delayed[i].node;
658                 if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
659                         continue;
660
661                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
662                 be_spill_phi(senv, node);
663         }
664         DEL_ARR_F(delayed);
665
666         /* Sort start values by first use */
667         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
668
669         /* Copy the best ones from starters to start workset */
670         ws_count = MIN((unsigned) ARR_LEN(starters), n_regs);
671         workset_clear(ws);
672         workset_bulk_fill(ws, ws_count, starters);
673
674         /* spill phis (the actual phis not just their values) that are in this block
675          * but not in the start workset */
676         len = ARR_LEN(starters);
677         for (i = ws_count; i < len; ++i) {
678                 ir_node *node = starters[i].node;
679                 if (! is_Phi(node) || get_nodes_block(node) != block)
680                         continue;
681
682                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
683                 be_spill_phi(senv, node);
684         }
685
686         DEL_ARR_F(starters);
687
688         /* determine spill status of the values: If there's 1 pred block (which
689          * is no backedge) where the value is spilled then we must set it to
690          * spilled here. */
691         for (i = 0; i < ws_count; ++i) {
692                 loc_t   *loc     = &ws->vals[i];
693                 ir_node *value   = loc->node;
694                 bool     spilled;
695                 int      n;
696
697                 /* phis from this block aren't spilled */
698                 if (get_nodes_block(value) == block) {
699                         assert(is_Phi(value));
700                         loc->spilled = false;
701                         continue;
702                 }
703
704                 /* determine if value was spilled on any predecessor */
705                 spilled = false;
706                 for (n = 0; n < arity; ++n) {
707                         workset_t *pred_workset = pred_worksets[n];
708                         int        p_len;
709                         int        p;
710
711                         if (pred_workset == NULL)
712                                 continue;
713
714                         p_len = workset_get_length(pred_workset);
715                         for (p = 0; p < p_len; ++p) {
716                                 loc_t *l = &pred_workset->vals[p];
717
718                                 if (l->node != value)
719                                         continue;
720
721                                 if (l->spilled) {
722                                         spilled = true;
723                                 }
724                                 break;
725                         }
726                 }
727
728                 loc->spilled = spilled;
729         }
730 }
731
732 /**
733  * For the given block @p block, decide for each values
734  * whether it is used from a register or is reloaded
735  * before the use.
736  */
737 static void process_block(ir_node *block)
738 {
739         workset_t    *new_vals;
740         unsigned      iter;
741         block_info_t *block_info;
742         int           arity;
743
744         /* no need to process a block twice */
745         assert(get_block_info(block) == NULL);
746
747         /* construct start workset */
748         arity = get_Block_n_cfgpreds(block);
749         if (arity == 0) {
750                 /* no predecessor -> empty set */
751                 workset_clear(ws);
752         } else if (arity == 1) {
753                 /* one predecessor, copy its end workset */
754                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
755                 block_info_t *pred_info  = get_block_info(pred_block);
756
757                 assert(pred_info != NULL);
758                 workset_copy(ws, pred_info->end_workset);
759         } else {
760                 /* multiple predecessors, do more advanced magic :) */
761                 decide_start_workset(block);
762         }
763
764         DB((dbg, DBG_DECIDE, "\n"));
765         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
766
767         block_info = new_block_info();
768         set_block_info(block, block_info);
769
770         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
771         {
772                 ir_node *irn;
773                 workset_foreach(ws, irn, iter) {
774                         DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn, workset_get_time(ws, iter)));
775                 }
776         }
777
778         block_info->start_workset = workset_clone(ws);
779
780         /* process the block from start to end */
781         DB((dbg, DBG_WSETS, "Processing...\n"));
782         /* TODO: this leaks (into the obstack)... */
783         new_vals = new_workset();
784
785         sched_foreach(block, irn) {
786                 assert(workset_get_length(ws) <= n_regs);
787
788                 /* Phis are no real instr (see insert_starters()) */
789                 if (is_Phi(irn)) {
790                         continue;
791                 }
792                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
793
794                 /* allocate all values _used_ by this instruction */
795                 workset_clear(new_vals);
796                 be_foreach_use(irn, cls, in_req_, in, in_req,
797                         /* (note that "spilled" is irrelevant here) */
798                         workset_insert(new_vals, in, false);
799                 );
800                 displace(new_vals, 1, irn);
801
802                 /* allocate all values _defined_ by this instruction */
803                 workset_clear(new_vals);
804                 be_foreach_definition(irn, cls, value, req,
805                         assert(req->width == 1);
806                         workset_insert(new_vals, value, false);
807                 );
808                 displace(new_vals, 0, irn);
809         }
810
811         /* Remember end-workset for this block */
812         block_info->end_workset = workset_clone(ws);
813         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
814         {
815                 ir_node *irn;
816                 workset_foreach(ws, irn, iter)
817                         DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn, workset_get_time(ws, iter)));
818         }
819 }
820
821 /**
822  * 'decide' is block-local and makes assumptions
823  * about the set of live-ins. Thus we must adapt the
824  * live-outs to the live-ins at each block-border.
825  */
826 static void fix_block_borders(ir_node *block, void *data)
827 {
828         workset_t *start_workset;
829         int        arity;
830         int        i;
831         unsigned   iter;
832         (void) data;
833
834         DB((dbg, DBG_FIX, "\n"));
835         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
836
837         arity = get_irn_arity(block);
838         /* can happen for endless loops */
839         if (arity == 0)
840                 return;
841
842         start_workset = get_block_info(block)->start_workset;
843
844         /* process all pred blocks */
845         for (i = 0; i < arity; ++i) {
846                 ir_node   *pred = get_Block_cfgpred_block(block, i);
847                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
848                 ir_node   *node;
849
850                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
851
852                 /* spill all values not used anymore */
853                 workset_foreach(pred_end_workset, node, iter) {
854                         ir_node *n2;
855                         unsigned iter2;
856                         bool     found = false;
857                         workset_foreach(start_workset, n2, iter2) {
858                                 if (n2 == node) {
859                                         found = true;
860                                         break;
861                                 }
862                                 /* note that we do not look at phi inputs, becuase the values
863                                  * will be either live-end and need no spill or
864                                  * they have other users in which must be somewhere else in the
865                                  * workset */
866                         }
867
868                         if (found)
869                                 continue;
870
871                         if (move_spills && be_is_live_in(lv, block, node)
872                                         && !pred_end_workset->vals[iter].spilled) {
873                                 ir_node *insert_point;
874                                 if (arity > 1) {
875                                         insert_point = be_get_end_of_block_insertion_point(pred);
876                                         insert_point = sched_prev(insert_point);
877                                 } else {
878                                         insert_point = block;
879                                 }
880                                 DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
881                                      insert_point));
882                                 be_add_spill(senv, node, insert_point);
883                         }
884                 }
885
886                 /* reload missing values in predecessors, add missing spills */
887                 workset_foreach(start_workset, node, iter) {
888                         const loc_t *l    = &start_workset->vals[iter];
889                         const loc_t *pred_loc;
890
891                         /* if node is a phi of the current block we reload
892                          * the corresponding argument, else node itself */
893                         if (is_Phi(node) && get_nodes_block(node) == block) {
894                                 node = get_irn_n(node, i);
895                                 assert(!l->spilled);
896
897                                 /* we might have unknowns as argument for the phi */
898                                 if (!arch_irn_consider_in_reg_alloc(cls, node))
899                                         continue;
900                         }
901
902                         /* check if node is in a register at end of pred */
903                         pred_loc = workset_contains(pred_end_workset, node);
904                         if (pred_loc != NULL) {
905                                 /* we might have to spill value on this path */
906                                 if (move_spills && !pred_loc->spilled && l->spilled) {
907                                         ir_node *insert_point
908                                                 = be_get_end_of_block_insertion_point(pred);
909                                         insert_point = sched_prev(insert_point);
910                                         DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
911                                             insert_point));
912                                         be_add_spill(senv, node, insert_point);
913                                 }
914                         } else {
915                                 /* node is not in register at the end of pred -> reload it */
916                                 DB((dbg, DBG_FIX, "    reload %+F\n", node));
917                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
918                                 be_add_reload_on_edge(senv, node, block, i, cls, 1);
919                         }
920                 }
921         }
922 }
923
924 static void be_spill_belady(ir_graph *irg, const arch_register_class_t *rcls)
925 {
926         int i;
927
928         be_assure_live_sets(irg);
929
930         stat_ev_tim_push();
931         assure_loopinfo(irg);
932         stat_ev_tim_pop("belady_time_backedges");
933
934         stat_ev_tim_push();
935         be_clear_links(irg);
936         stat_ev_tim_pop("belady_time_clear_links");
937
938         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
939
940         /* init belady env */
941         stat_ev_tim_push();
942         obstack_init(&obst);
943         cls       = rcls;
944         lv        = be_get_irg_liveness(irg);
945         n_regs    = be_get_n_allocatable_regs(irg, cls);
946         ws        = new_workset();
947         uses      = be_begin_uses(irg, lv);
948         loop_ana  = be_new_loop_pressure(irg, cls);
949         senv      = be_new_spill_env(irg);
950         blocklist = be_get_cfgpostorder(irg);
951         stat_ev_tim_pop("belady_time_init");
952
953         stat_ev_tim_push();
954         /* walk blocks in reverse postorder */
955         for (i = ARR_LEN(blocklist) - 1; i >= 0; --i) {
956                 process_block(blocklist[i]);
957         }
958         DEL_ARR_F(blocklist);
959         stat_ev_tim_pop("belady_time_belady");
960
961         stat_ev_tim_push();
962         /* belady was block-local, fix the global flow by adding reloads on the
963          * edges */
964         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
965         stat_ev_tim_pop("belady_time_fix_borders");
966
967         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
968
969         /* Insert spill/reload nodes into the graph and fix usages */
970         be_insert_spills_reloads(senv);
971
972         /* clean up */
973         be_delete_spill_env(senv);
974         be_end_uses(uses);
975         be_free_loop_pressure(loop_ana);
976         obstack_free(&obst, NULL);
977 }
978
979 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady)
980 void be_init_spillbelady(void)
981 {
982         static be_spiller_t belady_spiller = {
983                 be_spill_belady
984         };
985         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
986         lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
987         lc_opt_add_table(belady_group, options);
988
989         be_register_spiller("belady", &belady_spiller);
990         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
991 }