sparc: matching rules for andn and orn
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  * @version     $Id$
26  */
27 #include "config.h"
28
29 #include <stdbool.h>
30
31 #include "obst.h"
32 #include "irprintf_t.h"
33 #include "irgraph.h"
34 #include "irnode.h"
35 #include "irmode.h"
36 #include "irgwalk.h"
37 #include "irloop.h"
38 #include "iredges_t.h"
39 #include "ircons_t.h"
40 #include "irprintf.h"
41 #include "irnodeset.h"
42
43 #include "beutil.h"
44 #include "bearch.h"
45 #include "beuses.h"
46 #include "besched.h"
47 #include "beirgmod.h"
48 #include "belive_t.h"
49 #include "benode.h"
50 #include "bechordal_t.h"
51 #include "bespill.h"
52 #include "beloopana.h"
53 #include "beirg.h"
54 #include "bespillutil.h"
55 #include "bemodule.h"
56
57 #define DBG_SPILL     1
58 #define DBG_WSETS     2
59 #define DBG_FIX       4
60 #define DBG_DECIDE    8
61 #define DBG_START    16
62 #define DBG_SLOTS    32
63 #define DBG_TRACE    64
64 #define DBG_WORKSET 128
65 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
66
67 #define TIME_UNDEFINED 6666
68
69 /**
70  * An association between a node and a point in time.
71  */
72 typedef struct loc_t {
73         ir_node          *node;
74         unsigned          time;     /**< A use time (see beuses.h). */
75         bool              spilled;  /**< value was already spilled on this path */
76 } loc_t;
77
78 typedef struct _workset_t {
79         unsigned len;     /**< current length */
80         loc_t    vals[0]; /**< array of the values/distances in this working set */
81 } workset_t;
82
83 static struct obstack               obst;
84 static const arch_register_class_t *cls;
85 static const be_lv_t               *lv;
86 static be_loopana_t                *loop_ana;
87 static unsigned                     n_regs;
88 static workset_t                   *ws;     /**< the main workset used while
89                                                      processing a block. */
90 static be_uses_t                   *uses;   /**< env for the next-use magic */
91 static ir_node                     *instr;  /**< current instruction */
92 static unsigned                     instr_nr; /**< current instruction number
93                                                        (relative to block start) */
94 static spill_env_t                 *senv;   /**< see bespill.h */
95 static ir_node                    **blocklist;
96
97 static int                          move_spills      = true;
98 static int                          respectloopdepth = true;
99 static int                          improve_known_preds = true;
100 /* factor to weight the different costs of reloading/rematerializing a node
101    (see bespill.h be_get_reload_costs_no_weight) */
102 static int                          remat_bonus      = 10;
103
104 static const lc_opt_table_entry_t options[] = {
105         LC_OPT_ENT_BOOL   ("movespills", "try to move spills out of loops", &move_spills),
106         LC_OPT_ENT_BOOL   ("respectloopdepth", "outermost loop cutting", &respectloopdepth),
107         LC_OPT_ENT_BOOL   ("improveknownpreds", "known preds cutting", &improve_known_preds),
108         LC_OPT_ENT_INT    ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
109         LC_OPT_LAST
110 };
111
112 /**
113  * Alloc a new workset on obstack @p ob with maximum size @p max
114  */
115 static workset_t *new_workset(void)
116 {
117         return OALLOCFZ(&obst, workset_t, vals, n_regs);
118 }
119
120 /**
121  * Alloc a new instance on obstack and make it equal to @param workset
122  */
123 static workset_t *workset_clone(workset_t *workset)
124 {
125         workset_t *res = OALLOCF(&obst, workset_t, vals, n_regs);
126         memcpy(res, workset, sizeof(*res) + n_regs * sizeof(res->vals[0]));
127         return res;
128 }
129
130 /**
131  * Copy workset @param src to @param tgt
132  */
133 static void workset_copy(workset_t *dest, const workset_t *src)
134 {
135         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
136         memcpy(dest, src, size);
137 }
138
139 /**
140  * Overwrites the current content array of @param ws with the
141  * @param count locations given at memory @param locs.
142  * Set the length of @param ws to count.
143  */
144 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
145 {
146         workset->len = count;
147         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
148 }
149
150 /**
151  * Inserts the value @p val into the workset, iff it is not
152  * already contained. The workset must not be full.
153  */
154 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
155 {
156         loc_t    *loc;
157         unsigned  i;
158         /* check for current regclass */
159         assert(arch_irn_consider_in_reg_alloc(cls, val));
160
161         /* check if val is already contained */
162         for (i = 0; i < workset->len; ++i) {
163                 loc = &workset->vals[i];
164                 if (loc->node == val) {
165                         if (spilled) {
166                                 loc->spilled = true;
167                         }
168                         return;
169                 }
170         }
171
172         /* insert val */
173         assert(workset->len < n_regs && "Workset already full!");
174         loc           = &workset->vals[workset->len];
175         loc->node     = val;
176         loc->spilled  = spilled;
177         loc->time     = TIME_UNDEFINED;
178         workset->len++;
179 }
180
181 /**
182  * Removes all entries from this workset
183  */
184 static void workset_clear(workset_t *workset)
185 {
186         workset->len = 0;
187 }
188
189 /**
190  * Removes the value @p val from the workset if present.
191  */
192 static void workset_remove(workset_t *workset, ir_node *val)
193 {
194         unsigned i;
195         for (i = 0; i < workset->len; ++i) {
196                 if (workset->vals[i].node == val) {
197                         workset->vals[i] = workset->vals[--workset->len];
198                         return;
199                 }
200         }
201 }
202
203 static const loc_t *workset_contains(const workset_t *ws, const ir_node *val)
204 {
205         unsigned i;
206         for (i = 0; i < ws->len; ++i) {
207                 if (ws->vals[i].node == val)
208                         return &ws->vals[i];
209         }
210
211         return NULL;
212 }
213
214 static int loc_compare(const void *a, const void *b)
215 {
216         const loc_t *p = a;
217         const loc_t *q = b;
218         return p->time - q->time;
219 }
220
221 static void workset_sort(workset_t *workset)
222 {
223         qsort(workset->vals, workset->len, sizeof(workset->vals[0]), loc_compare);
224 }
225
226 static inline unsigned workset_get_time(const workset_t *workset, unsigned idx)
227 {
228         return workset->vals[idx].time;
229 }
230
231 static inline void workset_set_time(workset_t *workset, unsigned idx,
232                                     unsigned time)
233 {
234         workset->vals[idx].time = time;
235 }
236
237 static inline unsigned workset_get_length(const workset_t *workset)
238 {
239         return workset->len;
240 }
241
242 static inline void workset_set_length(workset_t *workset, unsigned len)
243 {
244         workset->len = len;
245 }
246
247 static inline ir_node *workset_get_val(const workset_t *workset, unsigned idx)
248 {
249         return workset->vals[idx].node;
250 }
251
252 /**
253  * Iterates over all values in the working set.
254  * @p ws The workset to iterate
255  * @p v  A variable to put the current value in
256  * @p i  An integer for internal use
257  */
258 #define workset_foreach(ws, v, i)       \
259         for (i=0; v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; ++i)
260
261 typedef struct _block_info_t
262 {
263         workset_t *start_workset;
264         workset_t *end_workset;
265 } block_info_t;
266
267 static block_info_t *new_block_info(void)
268 {
269         return OALLOCZ(&obst, block_info_t);
270 }
271
272 static inline block_info_t *get_block_info(const ir_node *block)
273 {
274         return get_irn_link(block);
275 }
276
277 static inline void set_block_info(ir_node *block, block_info_t *info)
278 {
279         set_irn_link(block, info);
280 }
281
282 /**
283  * @return The distance to the next use or 0 if irn has dont_spill flag set
284  */
285 static unsigned get_distance(ir_node *from, unsigned from_step,
286                              const ir_node *def, int skip_from_uses)
287 {
288         be_next_use_t use;
289         unsigned      costs;
290         unsigned      time;
291
292         assert(!arch_irn_is_ignore(def));
293
294         use  = be_get_next_use(uses, from, from_step, def, skip_from_uses);
295         time = use.time;
296         if (USES_IS_INFINITE(time))
297                 return USES_INFINITY;
298
299         /* We have to keep nonspillable nodes in the workingset */
300         if (arch_irn_get_flags(skip_Proj_const(def)) & arch_irn_flags_dont_spill)
301                 return 0;
302
303         /* give some bonus to rematerialisable nodes */
304         if (remat_bonus > 0) {
305                 costs = be_get_reload_costs_no_weight(senv, def, use.before);
306                 assert(costs * remat_bonus < 1000);
307                 time  += 1000 - (costs * remat_bonus);
308         }
309
310         return time;
311 }
312
313 /**
314  * Performs the actions necessary to grant the request that:
315  * - new_vals can be held in registers
316  * - as few as possible other values are disposed
317  * - the worst values get disposed
318  *
319  * @p is_usage indicates that the values in new_vals are used (not defined)
320  * In this case reloads must be performed
321  */
322 static void displace(workset_t *new_vals, int is_usage)
323 {
324         ir_node **to_insert = ALLOCAN(ir_node*, n_regs);
325         bool     *spilled   = ALLOCAN(bool,     n_regs);
326         ir_node  *val;
327         int       i;
328         int       len;
329         int       spills_needed;
330         int       demand;
331         unsigned  iter;
332
333         /* 1. Identify the number of needed slots and the values to reload */
334         demand = 0;
335         workset_foreach(new_vals, val, iter) {
336                 bool reloaded = false;
337
338                 if (! workset_contains(ws, val)) {
339                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
340                         if (is_usage) {
341                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
342                                 be_add_reload(senv, val, instr, cls, 1);
343                                 reloaded = true;
344                         }
345                 } else {
346                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
347                         assert(is_usage);
348                         /* remove the value from the current workset so it is not accidently
349                          * spilled */
350                         workset_remove(ws, val);
351                 }
352                 spilled[demand]   = reloaded;
353                 to_insert[demand] = val;
354                 ++demand;
355         }
356
357         /* 2. Make room for at least 'demand' slots */
358         len           = workset_get_length(ws);
359         spills_needed = len + demand - n_regs;
360         assert(spills_needed <= len);
361
362         /* Only make more free room if we do not have enough */
363         if (spills_needed > 0) {
364                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
365
366                 /* calculate current next-use distance for live values */
367                 for (i = 0; i < len; ++i) {
368                         ir_node  *val  = workset_get_val(ws, i);
369                         unsigned  dist = get_distance(instr, instr_nr, val, !is_usage);
370                         workset_set_time(ws, i, dist);
371                 }
372
373                 /* sort entries by increasing nextuse-distance*/
374                 workset_sort(ws);
375
376                 for (i = len - spills_needed; i < len; ++i) {
377                         ir_node *val = ws->vals[i].node;
378
379                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
380                              workset_get_time(ws, i)));
381
382                         if (move_spills) {
383                                 if (!USES_IS_INFINITE(ws->vals[i].time)
384                                                 && !ws->vals[i].spilled) {
385                                         ir_node *after_pos = sched_prev(instr);
386                                         DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
387                                                 after_pos));
388                                         be_add_spill(senv, val, after_pos);
389                                 }
390                         }
391                 }
392
393                 /* kill the last 'demand' entries in the array */
394                 workset_set_length(ws, len - spills_needed);
395         }
396
397         /* 3. Insert the new values into the workset */
398         for (i = 0; i < demand; ++i) {
399                 ir_node *val = to_insert[i];
400
401                 workset_insert(ws, val, spilled[i]);
402         }
403 }
404
405 enum {
406         AVAILABLE_EVERYWHERE,
407         AVAILABLE_NOWHERE,
408         AVAILABLE_PARTLY,
409         AVAILABLE_UNKNOWN
410 };
411
412 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
413                                        size_t n_pred_worksets,
414                                        const ir_node *value, bool is_local_phi)
415 {
416         size_t i;
417         bool   avail_everywhere = true;
418         bool   avail_nowhere    = true;
419
420         assert(n_pred_worksets > 0);
421
422         /* value available in all preds? */
423         for (i = 0; i < n_pred_worksets; ++i) {
424                 bool             found     = false;
425                 const workset_t *p_workset = pred_worksets[i];
426                 int              p_len     = workset_get_length(p_workset);
427                 int              p_i;
428                 const ir_node   *l_value;
429
430                 if (is_local_phi) {
431                         assert(is_Phi(value));
432                         l_value = get_irn_n(value, i);
433                 } else {
434                         l_value = value;
435                 }
436
437                 for (p_i = 0; p_i < p_len; ++p_i) {
438                         const loc_t *p_l = &p_workset->vals[p_i];
439                         if (p_l->node != l_value)
440                                 continue;
441
442                         found = true;
443                         break;
444                 }
445
446                 if (found) {
447                         avail_nowhere = false;
448                 } else {
449                         avail_everywhere = false;
450                 }
451         }
452
453         if (avail_everywhere) {
454                 assert(!avail_nowhere);
455                 return AVAILABLE_EVERYWHERE;
456         } else if (avail_nowhere) {
457                 return AVAILABLE_NOWHERE;
458         } else {
459                 return AVAILABLE_PARTLY;
460         }
461 }
462
463 /** Decides whether a specific node should be in the start workset or not
464  *
465  * @param env      belady environment
466  * @param first
467  * @param node     the node to test
468  * @param loop     the loop of the node
469  */
470 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
471                                     ir_loop *loop, unsigned available)
472 {
473         be_next_use_t next_use;
474         loc_t         loc;
475
476         loc.time    = USES_INFINITY;
477         loc.node    = node;
478         loc.spilled = false;
479
480         if (!arch_irn_consider_in_reg_alloc(cls, node)) {
481                 loc.time = USES_INFINITY;
482                 return loc;
483         }
484
485         /* We have to keep nonspillable nodes in the workingset */
486         if (arch_irn_get_flags(skip_Proj_const(node)) & arch_irn_flags_dont_spill) {
487                 loc.time = 0;
488                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
489                 return loc;
490         }
491
492         next_use = be_get_next_use(uses, first, 0, node, 0);
493         if (USES_IS_INFINITE(next_use.time)) {
494                 /* the nodes marked as live in shouldn't be dead, so it must be a phi */
495                 assert(is_Phi(node));
496                 loc.time = USES_INFINITY;
497                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
498                 return loc;
499         }
500
501         loc.time = next_use.time;
502
503         if (improve_known_preds) {
504                 if (available == AVAILABLE_EVERYWHERE) {
505                         DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n",
506                             node, loc.time));
507                         return loc;
508                 } else if (available == AVAILABLE_NOWHERE) {
509                         DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n",
510                             node, loc.time));
511                         loc.time = USES_INFINITY;
512                         return loc;
513                 }
514         }
515
516         if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
517                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
518                     next_use.outermost_loop));
519         } else {
520                 loc.time = USES_PENDING;
521                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
522                     node, next_use.outermost_loop, get_loop_depth(loop)));
523         }
524
525         return loc;
526 }
527
528 /**
529  * Computes the start-workset for a block with multiple predecessors. We assume
530  * that at least 1 of the predeccesors is a back-edge which means we're at the
531  * beginning of a loop. We try to reload as much values as possible now so they
532  * don't get reloaded inside the loop.
533  */
534 static void decide_start_workset(const ir_node *block)
535 {
536         ir_loop    *loop = get_irn_loop(block);
537         ir_node    *first;
538         ir_node    *node;
539         loc_t       loc;
540         loc_t      *starters;
541         loc_t      *delayed;
542         unsigned    len;
543         unsigned    i;
544         int         in;
545         unsigned    ws_count;
546         int             free_slots, free_pressure_slots;
547         unsigned    pressure;
548         int         arity;
549         workset_t **pred_worksets;
550         bool        all_preds_known;
551
552         /* check predecessors */
553         arity           = get_irn_arity(block);
554         pred_worksets   = ALLOCAN(workset_t*, arity);
555         all_preds_known = true;
556         for (in = 0; in < arity; ++in) {
557                 ir_node      *pred_block = get_Block_cfgpred_block(block, in);
558                 block_info_t *pred_info  = get_block_info(pred_block);
559
560                 if (pred_info == NULL) {
561                         pred_worksets[in] = NULL;
562                         all_preds_known   = false;
563                 } else {
564                         pred_worksets[in] = pred_info->end_workset;
565                 }
566         }
567
568         /* Collect all values living at start of block */
569         starters = NEW_ARR_F(loc_t, 0);
570         delayed  = NEW_ARR_F(loc_t, 0);
571
572         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
573         first = sched_first(block);
574
575         /* check all Phis first */
576         sched_foreach(block, node) {
577                 unsigned available;
578
579                 if (! is_Phi(node))
580                         break;
581                 if (!arch_irn_consider_in_reg_alloc(cls, node))
582                         continue;
583
584                 if (all_preds_known) {
585                         available = available_in_all_preds(pred_worksets, arity, node, true);
586                 } else {
587                         available = AVAILABLE_UNKNOWN;
588                 }
589
590                 loc = to_take_or_not_to_take(first, node, loop, available);
591
592                 if (! USES_IS_INFINITE(loc.time)) {
593                         if (USES_IS_PENDING(loc.time))
594                                 ARR_APP1(loc_t, delayed, loc);
595                         else
596                                 ARR_APP1(loc_t, starters, loc);
597                 } else {
598                         be_spill_phi(senv, node);
599                 }
600         }
601
602         /* check all Live-Ins */
603         be_lv_foreach(lv, block, be_lv_state_in, in) {
604                 ir_node *node = be_lv_get_irn(lv, block, in);
605                 unsigned available;
606
607                 if (all_preds_known) {
608                         available = available_in_all_preds(pred_worksets, arity, node, false);
609                 } else {
610                         available = AVAILABLE_UNKNOWN;
611                 }
612
613                 loc = to_take_or_not_to_take(first, node, loop, available);
614
615                 if (! USES_IS_INFINITE(loc.time)) {
616                         if (USES_IS_PENDING(loc.time))
617                                 ARR_APP1(loc_t, delayed, loc);
618                         else
619                                 ARR_APP1(loc_t, starters, loc);
620                 }
621         }
622
623         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
624         assert(ARR_LEN(delayed) <= (signed)pressure);
625         free_slots          = n_regs - ARR_LEN(starters);
626         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
627         free_slots          = MIN(free_slots, free_pressure_slots);
628
629         /* so far we only put nodes into the starters list that are used inside
630          * the loop. If register pressure in the loop is low then we can take some
631          * values and let them live through the loop */
632         DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
633             pressure, free_slots));
634         if (free_slots > 0) {
635                 int i;
636                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
637
638                 for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
639                         int    p, arity;
640                         loc_t *loc = & delayed[i];
641
642                         if (!is_Phi(loc->node)) {
643                                 /* don't use values which are dead in a known predecessors
644                                  * to not induce unnecessary reloads */
645                                 arity = get_irn_arity(block);
646                                 for (p = 0; p < arity; ++p) {
647                                         ir_node      *pred_block = get_Block_cfgpred_block(block, p);
648                                         block_info_t *pred_info  = get_block_info(pred_block);
649
650                                         if (pred_info == NULL)
651                                                 continue;
652
653                                         if (!workset_contains(pred_info->end_workset, loc->node)) {
654                                                 DB((dbg, DBG_START,
655                                                         "    delayed %+F not live at pred %+F\n", loc->node,
656                                                         pred_block));
657                                                 goto skip_delayed;
658                                         }
659                                 }
660                         }
661
662                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
663                         ARR_APP1(loc_t, starters, *loc);
664                         loc->node = NULL;
665                         --free_slots;
666                 skip_delayed:
667                         ;
668                 }
669         }
670
671         /* spill phis (the actual phis not just their values) that are in this block
672          * but not in the start workset */
673         len = ARR_LEN(delayed);
674         for (i = 0; i < len; ++i) {
675                 ir_node *node = delayed[i].node;
676                 if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
677                         continue;
678
679                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
680                 be_spill_phi(senv, node);
681         }
682         DEL_ARR_F(delayed);
683
684         /* Sort start values by first use */
685         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
686
687         /* Copy the best ones from starters to start workset */
688         ws_count = MIN((unsigned) ARR_LEN(starters), n_regs);
689         workset_clear(ws);
690         workset_bulk_fill(ws, ws_count, starters);
691
692         /* spill phis (the actual phis not just their values) that are in this block
693          * but not in the start workset */
694         len = ARR_LEN(starters);
695         for (i = ws_count; i < len; ++i) {
696                 ir_node *node = starters[i].node;
697                 if (! is_Phi(node) || get_nodes_block(node) != block)
698                         continue;
699
700                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
701                 be_spill_phi(senv, node);
702         }
703
704         DEL_ARR_F(starters);
705
706         /* determine spill status of the values: If there's 1 pred block (which
707          * is no backedge) where the value is spilled then we must set it to
708          * spilled here. */
709         for (i = 0; i < ws_count; ++i) {
710                 loc_t   *loc     = &ws->vals[i];
711                 ir_node *value   = loc->node;
712                 bool     spilled;
713                 int      n;
714
715                 /* phis from this block aren't spilled */
716                 if (get_nodes_block(value) == block) {
717                         assert(is_Phi(value));
718                         loc->spilled = false;
719                         continue;
720                 }
721
722                 /* determine if value was spilled on any predecessor */
723                 spilled = false;
724                 for (n = 0; n < arity; ++n) {
725                         workset_t *pred_workset = pred_worksets[n];
726                         int        p_len;
727                         int        p;
728
729                         if (pred_workset == NULL)
730                                 continue;
731
732                         p_len = workset_get_length(pred_workset);
733                         for (p = 0; p < p_len; ++p) {
734                                 loc_t *l = &pred_workset->vals[p];
735
736                                 if (l->node != value)
737                                         continue;
738
739                                 if (l->spilled) {
740                                         spilled = true;
741                                 }
742                                 break;
743                         }
744                 }
745
746                 loc->spilled = spilled;
747         }
748 }
749
750 /**
751  * For the given block @p block, decide for each values
752  * whether it is used from a register or is reloaded
753  * before the use.
754  */
755 static void process_block(ir_node *block)
756 {
757         workset_t    *new_vals;
758         ir_node      *irn;
759         unsigned      iter;
760         block_info_t *block_info;
761         int           arity;
762
763         /* no need to process a block twice */
764         assert(get_block_info(block) == NULL);
765
766         /* construct start workset */
767         arity = get_Block_n_cfgpreds(block);
768         if (arity == 0) {
769                 /* no predecessor -> empty set */
770                 workset_clear(ws);
771         } else if (arity == 1) {
772                 /* one predecessor, copy it's end workset */
773                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
774                 block_info_t *pred_info  = get_block_info(pred_block);
775
776                 assert(pred_info != NULL);
777                 workset_copy(ws, pred_info->end_workset);
778         } else {
779                 /* multiple predecessors, do more advanced magic :) */
780                 decide_start_workset(block);
781         }
782
783         DB((dbg, DBG_DECIDE, "\n"));
784         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
785
786         block_info = new_block_info();
787         set_block_info(block, block_info);
788
789         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
790         workset_foreach(ws, irn, iter) {
791                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
792                      workset_get_time(ws, iter)));
793         }
794
795         block_info->start_workset = workset_clone(ws);
796
797         /* process the block from start to end */
798         DB((dbg, DBG_WSETS, "Processing...\n"));
799         instr_nr = 0;
800         /* TODO: this leaks (into the obstack)... */
801         new_vals = new_workset();
802
803         sched_foreach(block, irn) {
804                 int i, arity;
805                 ir_node *value;
806                 assert(workset_get_length(ws) <= n_regs);
807
808                 /* Phis are no real instr (see insert_starters()) */
809                 if (is_Phi(irn)) {
810                         continue;
811                 }
812                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
813
814                 /* set instruction in the workset */
815                 instr = irn;
816
817                 /* allocate all values _used_ by this instruction */
818                 workset_clear(new_vals);
819                 for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
820                         ir_node *in = get_irn_n(irn, i);
821                         if (!arch_irn_consider_in_reg_alloc(cls, in))
822                                 continue;
823
824                         /* (note that "spilled" is irrelevant here) */
825                         workset_insert(new_vals, in, false);
826                 }
827                 displace(new_vals, 1);
828
829                 /* allocate all values _defined_ by this instruction */
830                 workset_clear(new_vals);
831                 be_foreach_definition(irn, cls, value,
832                         assert(req_->width == 1);
833                         workset_insert(new_vals, value, false);
834                 );
835                 displace(new_vals, 0);
836
837                 instr_nr++;
838         }
839
840         /* Remember end-workset for this block */
841         block_info->end_workset = workset_clone(ws);
842         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
843         workset_foreach(ws, irn, iter)
844                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn, workset_get_time(ws, iter)));
845 }
846
847 /**
848  * 'decide' is block-local and makes assumptions
849  * about the set of live-ins. Thus we must adapt the
850  * live-outs to the live-ins at each block-border.
851  */
852 static void fix_block_borders(ir_node *block, void *data)
853 {
854         workset_t *start_workset;
855         int        arity;
856         int        i;
857         unsigned   iter;
858         (void) data;
859
860         DB((dbg, DBG_FIX, "\n"));
861         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
862
863         arity = get_irn_arity(block);
864         /* can happen for endless loops */
865         if (arity == 0)
866                 return;
867
868         start_workset = get_block_info(block)->start_workset;
869
870         /* process all pred blocks */
871         for (i = 0; i < arity; ++i) {
872                 ir_node   *pred = get_Block_cfgpred_block(block, i);
873                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
874                 ir_node   *node;
875
876                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
877
878                 /* spill all values not used anymore */
879                 workset_foreach(pred_end_workset, node, iter) {
880                         ir_node *n2;
881                         unsigned iter2;
882                         bool     found = false;
883                         workset_foreach(start_workset, n2, iter2) {
884                                 if (n2 == node) {
885                                         found = true;
886                                         break;
887                                 }
888                                 /* note that we do not look at phi inputs, becuase the values
889                                  * will be either live-end and need no spill or
890                                  * they have other users in which must be somewhere else in the
891                                  * workset */
892                         }
893
894                         if (found)
895                                 continue;
896
897                         if (move_spills && be_is_live_in(lv, block, node)
898                                         && !pred_end_workset->vals[iter].spilled) {
899                                 ir_node *insert_point;
900                                 if (arity > 1) {
901                                         insert_point = be_get_end_of_block_insertion_point(pred);
902                                         insert_point = sched_prev(insert_point);
903                                 } else {
904                                         insert_point = block;
905                                 }
906                                 DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
907                                      insert_point));
908                                 be_add_spill(senv, node, insert_point);
909                         }
910                 }
911
912                 /* reload missing values in predecessors, add missing spills */
913                 workset_foreach(start_workset, node, iter) {
914                         const loc_t *l    = &start_workset->vals[iter];
915                         const loc_t *pred_loc;
916
917                         /* if node is a phi of the current block we reload
918                          * the corresponding argument, else node itself */
919                         if (is_Phi(node) && get_nodes_block(node) == block) {
920                                 node = get_irn_n(node, i);
921                                 assert(!l->spilled);
922
923                                 /* we might have unknowns as argument for the phi */
924                                 if (!arch_irn_consider_in_reg_alloc(cls, node))
925                                         continue;
926                         }
927
928                         /* check if node is in a register at end of pred */
929                         pred_loc = workset_contains(pred_end_workset, node);
930                         if (pred_loc != NULL) {
931                                 /* we might have to spill value on this path */
932                                 if (move_spills && !pred_loc->spilled && l->spilled) {
933                                         ir_node *insert_point
934                                                 = be_get_end_of_block_insertion_point(pred);
935                                         insert_point = sched_prev(insert_point);
936                                         DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
937                                             insert_point));
938                                         be_add_spill(senv, node, insert_point);
939                                 }
940                         } else {
941                                 /* node is not in register at the end of pred -> reload it */
942                                 DB((dbg, DBG_FIX, "    reload %+F\n", node));
943                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
944                                 be_add_reload_on_edge(senv, node, block, i, cls, 1);
945                         }
946                 }
947         }
948 }
949
950 static void be_spill_belady(ir_graph *irg, const arch_register_class_t *rcls)
951 {
952         int i;
953
954         be_liveness_assure_sets(be_assure_liveness(irg));
955
956         stat_ev_tim_push();
957         /* construct control flow loop tree */
958         if (! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
959                 construct_cf_backedges(irg);
960         }
961         stat_ev_tim_pop("belady_time_backedges");
962
963         stat_ev_tim_push();
964         be_clear_links(irg);
965         stat_ev_tim_pop("belady_time_clear_links");
966
967         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
968
969         /* init belady env */
970         stat_ev_tim_push();
971         obstack_init(&obst);
972         cls       = rcls;
973         lv        = be_get_irg_liveness(irg);
974         n_regs    = cls->n_regs - be_put_ignore_regs(irg, cls, NULL);
975         ws        = new_workset();
976         uses      = be_begin_uses(irg, lv);
977         loop_ana  = be_new_loop_pressure(irg, cls);
978         senv      = be_new_spill_env(irg);
979         blocklist = be_get_cfgpostorder(irg);
980         stat_ev_tim_pop("belady_time_init");
981
982         stat_ev_tim_push();
983         /* walk blocks in reverse postorder */
984         for (i = ARR_LEN(blocklist) - 1; i >= 0; --i) {
985                 process_block(blocklist[i]);
986         }
987         DEL_ARR_F(blocklist);
988         stat_ev_tim_pop("belady_time_belady");
989
990         stat_ev_tim_push();
991         /* belady was block-local, fix the global flow by adding reloads on the
992          * edges */
993         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
994         stat_ev_tim_pop("belady_time_fix_borders");
995
996         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
997
998         /* Insert spill/reload nodes into the graph and fix usages */
999         be_insert_spills_reloads(senv);
1000
1001         /* clean up */
1002         be_delete_spill_env(senv);
1003         be_end_uses(uses);
1004         be_free_loop_pressure(loop_ana);
1005         obstack_free(&obst, NULL);
1006 }
1007
1008 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);
1009 void be_init_spillbelady(void)
1010 {
1011         static be_spiller_t belady_spiller = {
1012                 be_spill_belady
1013         };
1014         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
1015         lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
1016         lc_opt_add_table(belady_group, options);
1017
1018         be_register_spiller("belady", &belady_spiller);
1019         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
1020 }