fix some phis not getting spilled correctly
[libfirm] / ir / be / bespillbelady.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  * @version     $Id$
26  */
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include <stdbool.h>
32
33 #include "obst.h"
34 #include "irprintf_t.h"
35 #include "irgraph.h"
36 #include "irnode.h"
37 #include "irmode.h"
38 #include "irgwalk.h"
39 #include "irloop.h"
40 #include "iredges_t.h"
41 #include "ircons_t.h"
42 #include "irprintf.h"
43 #include "irnodeset.h"
44 #include "xmalloc.h"
45 #include "pdeq.h"
46
47 #include "beutil.h"
48 #include "bearch_t.h"
49 #include "beuses.h"
50 #include "besched_t.h"
51 #include "beirgmod.h"
52 #include "belive_t.h"
53 #include "benode_t.h"
54 #include "bechordal_t.h"
55 #include "bespilloptions.h"
56 #include "beloopana.h"
57 #include "beirg_t.h"
58 #include "bespill.h"
59 #include "bemodule.h"
60
61 #define DBG_SPILL     1
62 #define DBG_WSETS     2
63 #define DBG_FIX       4
64 #define DBG_DECIDE    8
65 #define DBG_START    16
66 #define DBG_SLOTS    32
67 #define DBG_TRACE    64
68 #define DBG_WORKSET 128
69 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
70
71 /* factor to weight the different costs of reloading/rematerializing a node
72    (see bespill.h be_get_reload_costs_no_weight) */
73 #define RELOAD_COST_FACTOR   10
74
75 #define TIME_UNDEFINED 6666
76
77 #define PLACE_SPILLS
78
79 /**
80  * An association between a node and a point in time.
81  */
82 typedef struct loc_t {
83         ir_node          *node;
84         unsigned          time;     /**< A use time (see beuses.h). */
85         bool              spilled;  /**< the value was already spilled on this path */
86 } loc_t;
87
88 typedef struct _workset_t {
89         int   len;          /**< current length */
90         loc_t vals[0];      /**< inlined array of the values/distances in this working set */
91 } workset_t;
92
93 static struct obstack               obst;
94 static const arch_env_t            *arch_env;
95 static const arch_register_class_t *cls;
96 static const be_lv_t               *lv;
97 static be_loopana_t                *loop_ana;
98 static int                          n_regs;
99 static workset_t                   *ws;     /**< the main workset used while
100                                                      processing a block. */
101 static be_uses_t                   *uses;   /**< env for the next-use magic */
102 static ir_node                     *instr;  /**< current instruction */
103 static unsigned                     instr_nr; /**< current instruction number
104                                                        (relative to block start) */
105 static ir_nodeset_t                 used;
106 static spill_env_t                 *senv;   /**< see bespill.h */
107 static pdeq                        *worklist;
108
109 static int loc_compare(const void *a, const void *b)
110 {
111         const loc_t *p = a;
112         const loc_t *q = b;
113         return p->time - q->time;
114 }
115
116 void workset_print(const workset_t *w)
117 {
118         int i;
119
120         for(i = 0; i < w->len; ++i) {
121                 ir_fprintf(stderr, "%+F %d\n", w->vals[i].node, w->vals[i].time);
122         }
123 }
124
125 /**
126  * Alloc a new workset on obstack @p ob with maximum size @p max
127  */
128 static workset_t *new_workset(void)
129 {
130         workset_t *res;
131         size_t     size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
132
133         res  = obstack_alloc(&obst, size);
134         memset(res, 0, size);
135         return res;
136 }
137
138 /**
139  * Alloc a new instance on obstack and make it equal to @param workset
140  */
141 static workset_t *workset_clone(workset_t *workset)
142 {
143         workset_t *res;
144         size_t size = sizeof(*res) + n_regs * sizeof(res->vals[0]);
145         res = obstack_alloc(&obst, size);
146         memcpy(res, workset, size);
147         return res;
148 }
149
150 /**
151  * Copy workset @param src to @param tgt
152  */
153 static void workset_copy(workset_t *dest, const workset_t *src)
154 {
155         size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
156         memcpy(dest, src, size);
157 }
158
159 /**
160  * Overwrites the current content array of @param ws with the
161  * @param count locations given at memory @param locs.
162  * Set the length of @param ws to count.
163  */
164 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
165 {
166         workset->len = count;
167         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
168 }
169
170 /**
171  * Inserts the value @p val into the workset, iff it is not
172  * already contained. The workset must not be full.
173  */
174 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
175 {
176         loc_t *loc;
177         int    i;
178         /* check for current regclass */
179         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, val));
180
181         /* check if val is already contained */
182         for (i = 0; i < workset->len; ++i) {
183                 loc = &workset->vals[i];
184                 if (loc->node == val) {
185                         if (spilled) {
186                                 loc->spilled = true;
187                         }
188                         return;
189                 }
190         }
191
192         /* insert val */
193         assert(workset->len < n_regs && "Workset already full!");
194         loc           = &workset->vals[workset->len];
195         loc->node     = val;
196         loc->spilled  = spilled;
197         loc->time     = TIME_UNDEFINED;
198         workset->len++;
199 }
200
201 /**
202  * Removes all entries from this workset
203  */
204 static void workset_clear(workset_t *workset)
205 {
206         workset->len = 0;
207 }
208
209 /**
210  * Removes the value @p val from the workset if present.
211  */
212 static INLINE void workset_remove(workset_t *workset, ir_node *val)
213 {
214         int i;
215         for(i = 0; i < workset->len; ++i) {
216                 if (workset->vals[i].node == val) {
217                         workset->vals[i] = workset->vals[--workset->len];
218                         return;
219                 }
220         }
221 }
222
223 static INLINE const loc_t *workset_contains(const workset_t *ws,
224                                             const ir_node *val)
225 {
226         int i;
227
228         for (i = 0; i < ws->len; ++i) {
229                 if (ws->vals[i].node == val)
230                         return &ws->vals[i];
231         }
232
233         return NULL;
234 }
235
236 /**
237  * Iterates over all values in the working set.
238  * @p ws The workset to iterate
239  * @p v  A variable to put the current value in
240  * @p i  An integer for internal use
241  */
242 #define workset_foreach(ws, v, i)       for(i=0; \
243                                                                                 v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; \
244                                                                                 ++i)
245
246 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
247 #define workset_get_time(ws, i) (ws)->vals[i].time
248 #define workset_set_length(ws, length) (ws)->len = length
249 #define workset_get_length(ws) ((ws)->len)
250 #define workset_get_val(ws, i) ((ws)->vals[i].node)
251 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
252
253 typedef struct _block_info_t
254 {
255         workset_t *start_workset;
256         workset_t *end_workset;
257 } block_info_t;
258
259
260 static void *new_block_info(void)
261 {
262         block_info_t *res = obstack_alloc(&obst, sizeof(res[0]));
263         memset(res, 0, sizeof(res[0]));
264
265         return res;
266 }
267
268 #define get_block_info(block)        ((block_info_t *)get_irn_link(block))
269 #define set_block_info(block, info)  set_irn_link(block, info)
270
271 /**
272  * @return The distance to the next use or 0 if irn has dont_spill flag set
273  */
274 static INLINE unsigned get_distance(ir_node *from, unsigned from_step,
275                                     const ir_node *def, int skip_from_uses)
276 {
277         be_next_use_t use;
278         int           flags = arch_irn_get_flags(arch_env, def);
279         unsigned      costs;
280         unsigned      time;
281
282         assert(! (flags & arch_irn_flags_ignore));
283
284         use = be_get_next_use(uses, from, from_step, def, skip_from_uses);
285         if (USES_IS_INFINITE(use.time))
286                 return USES_INFINITY;
287
288         /* We have to keep nonspillable nodes in the workingset */
289         if (flags & arch_irn_flags_dont_spill)
290                 return 0;
291
292         costs = be_get_reload_costs_no_weight(senv, def, use.before);
293         assert(costs * RELOAD_COST_FACTOR < 1000);
294         time  = use.time + 1000 - (costs * RELOAD_COST_FACTOR);
295
296         return time;
297 }
298
299 /**
300  * Performs the actions necessary to grant the request that:
301  * - new_vals can be held in registers
302  * - as few as possible other values are disposed
303  * - the worst values get disposed
304  *
305  * @p is_usage indicates that the values in new_vals are used (not defined)
306  * In this case reloads must be performed
307  */
308 static void displace(workset_t *new_vals, int is_usage)
309 {
310         ir_node **to_insert = alloca(n_regs * sizeof(to_insert[0]));
311         bool     *spilled   = alloca(n_regs * sizeof(spilled[0]));
312         ir_node  *val;
313         int       i;
314         int       len;
315         int       spills_needed;
316         int       demand;
317         int       iter;
318
319         /* 1. Identify the number of needed slots and the values to reload */
320         demand = 0;
321         workset_foreach(new_vals, val, iter) {
322                 bool reloaded = false;
323
324                 /* mark value as used */
325                 if (is_usage)
326                         ir_nodeset_insert(&used, val);
327
328                 if (! workset_contains(ws, val)) {
329                         DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
330                         if (is_usage) {
331                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
332                                 be_add_reload(senv, val, instr, cls, 1);
333                                 reloaded = true;
334                         }
335                 } else {
336                         DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
337                         assert(is_usage);
338                         /* remove the value from the current workset so it is not accidently
339                          * spilled */
340                         workset_remove(ws, val);
341                 }
342                 spilled[demand]   = reloaded;
343                 to_insert[demand] = val;
344                 ++demand;
345         }
346
347         /* 2. Make room for at least 'demand' slots */
348         len           = workset_get_length(ws);
349         spills_needed = len + demand - n_regs;
350         assert(spills_needed <= len);
351
352         /* Only make more free room if we do not have enough */
353         if (spills_needed > 0) {
354 #ifndef PLACE_SPILLS
355                 ir_node   *curr_bb  = get_nodes_block(instr);
356                 workset_t *ws_start = get_block_info(curr_bb)->start_workset;
357 #endif
358
359                 DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
360
361                 /* calculate current next-use distance for live values */
362                 for (i = 0; i < len; ++i) {
363                         ir_node  *val  = workset_get_val(ws, i);
364                         unsigned  dist = get_distance(instr, instr_nr, val, !is_usage);
365                         workset_set_time(ws, i, dist);
366                 }
367
368                 /* sort entries by increasing nextuse-distance*/
369                 workset_sort(ws);
370
371                 for (i = len - spills_needed; i < len; ++i) {
372                         ir_node *val = ws->vals[i].node;
373
374                         DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
375                              workset_get_time(ws, i)));
376
377 #ifdef PLACE_SPILLS
378                         if (!USES_IS_INFINITE(ws->vals[i].time) && !ws->vals[i].spilled) {
379                                 ir_node *after_pos = sched_prev(instr);
380                                 DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
381                                     after_pos));
382                                 be_add_spill(senv, val, after_pos);
383                         }
384 #endif
385
386 #ifndef PLACE_SPILLS
387                         /* Logic for not needed live-ins: If a value is disposed
388                          * before its first use, remove it from start workset
389                          * We don't do this for phis though     */
390                         if (!is_Phi(val) && ! ir_nodeset_contains(&used, val)) {
391                                 workset_remove(ws_start, val);
392                                 DB((dbg, DBG_DECIDE, "    (and removing %+F from start workset)\n", val));
393                         }
394 #endif
395                 }
396
397                 /* kill the last 'demand' entries in the array */
398                 workset_set_length(ws, len - spills_needed);
399         }
400
401         /* 3. Insert the new values into the workset */
402         for (i = 0; i < demand; ++i) {
403                 ir_node *val = to_insert[i];
404
405                 workset_insert(ws, val, spilled[i]);
406         }
407 }
408
409 enum {
410         AVAILABLE_EVERYWHERE,
411         AVAILABLE_NOWHERE,
412         AVAILABLE_PARTLY,
413         AVAILABLE_UNKNOWN
414 };
415
416 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
417                                        size_t n_pred_worksets,
418                                        const ir_node *value, bool is_local_phi)
419 {
420         size_t i;
421         bool   avail_everywhere = true;
422         bool   avail_nowhere    = true;
423
424         assert(n_pred_worksets > 0);
425
426         /* value available in all preds? */
427         for (i = 0; i < n_pred_worksets; ++i) {
428                 bool             found     = false;
429                 const workset_t *p_workset = pred_worksets[i];
430                 int              p_len     = workset_get_length(p_workset);
431                 int              p_i;
432                 const ir_node   *l_value;
433
434                 if (is_local_phi) {
435                         assert(is_Phi(value));
436                         l_value = get_irn_n(value, i);
437                 } else {
438                         l_value = value;
439                 }
440
441                 for (p_i = 0; p_i < p_len; ++p_i) {
442                         const loc_t *p_l = &p_workset->vals[p_i];
443                         if (p_l->node != l_value)
444                                 continue;
445
446                         found = true;
447                         break;
448                 }
449
450                 if (found) {
451                         avail_nowhere = false;
452                 } else {
453                         avail_everywhere = false;
454                 }
455         }
456
457         if (avail_everywhere) {
458                 assert(!avail_nowhere);
459                 return AVAILABLE_EVERYWHERE;
460         } else if (avail_nowhere) {
461                 return AVAILABLE_NOWHERE;
462         } else {
463                 return AVAILABLE_PARTLY;
464         }
465 }
466
467 /** Decides whether a specific node should be in the start workset or not
468  *
469  * @param env      belady environment
470  * @param first
471  * @param node     the node to test
472  * @param loop     the loop of the node
473  */
474 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
475                                     ir_loop *loop, unsigned available)
476 {
477         be_next_use_t next_use;
478         loc_t         loc;
479
480         loc.time    = USES_INFINITY;
481         loc.node    = node;
482         loc.spilled = false;
483
484         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
485                 loc.time = USES_INFINITY;
486                 return loc;
487         }
488
489         /* We have to keep nonspillable nodes in the workingset */
490         if (arch_irn_get_flags(arch_env, node) & arch_irn_flags_dont_spill) {
491                 loc.time = 0;
492                 DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
493                 return loc;
494         }
495
496         next_use = be_get_next_use(uses, first, 0, node, 0);
497         if (USES_IS_INFINITE(next_use.time)) {
498                 // the nodes marked as live in shouldn't be dead, so it must be a phi
499                 assert(is_Phi(node));
500                 loc.time = USES_INFINITY;
501                 DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
502                 return loc;
503         }
504
505         loc.time = next_use.time;
506
507         if (available == AVAILABLE_EVERYWHERE) {
508                 DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n", node,
509                     loc.time));
510                 return loc;
511         } else if(available == AVAILABLE_NOWHERE) {
512                 DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n", node,
513                     loc.time));
514                 loc.time = USES_INFINITY;
515                 return loc;
516         }
517
518         if (next_use.outermost_loop >= get_loop_depth(loop)) {
519                 DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
520                     next_use.outermost_loop));
521         } else {
522                 loc.time = USES_PENDING;
523                 DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
524                     node, next_use.outermost_loop, get_loop_depth(loop)));
525         }
526         return loc;
527 }
528
529 /**
530  * Computes the start-workset for a block with multiple predecessors. We assume
531  * that at least 1 of the predeccesors is a back-edge which means we're at the
532  * beginning of a loop. We try to reload as much values as possible now so they
533  * don't get reloaded inside the loop.
534  */
535 static void decide_start_workset(const ir_node *block)
536 {
537         ir_loop    *loop = get_irn_loop(block);
538         ir_node    *first;
539         ir_node    *node;
540         loc_t       loc;
541         loc_t      *starters;
542         loc_t      *delayed;
543         int         i, len, ws_count;
544         int             free_slots, free_pressure_slots;
545         unsigned    pressure;
546         int         arity;
547         workset_t **pred_worksets;
548         bool        all_preds_known;
549
550         /* check predecessors */
551         arity           = get_irn_arity(block);
552         pred_worksets   = alloca(sizeof(pred_worksets[0]) * arity);
553         all_preds_known = true;
554         for(i = 0; i < arity; ++i) {
555                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
556                 block_info_t *pred_info  = get_block_info(pred_block);
557
558                 if (pred_info == NULL) {
559                         pred_worksets[i] = NULL;
560                         all_preds_known  = false;
561                 } else {
562                         pred_worksets[i] = pred_info->end_workset;
563                 }
564         }
565
566         /* Collect all values living at start of block */
567         starters = NEW_ARR_F(loc_t, 0);
568         delayed  = NEW_ARR_F(loc_t, 0);
569
570         DB((dbg, DBG_START, "Living at start of %+F:\n", block));
571         first = sched_first(block);
572
573         /* check all Phis first */
574         sched_foreach(block, node) {
575                 unsigned available;
576
577                 if (! is_Phi(node))
578                         break;
579                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
580                         continue;
581
582                 if (all_preds_known) {
583                         available = available_in_all_preds(pred_worksets, arity, node, true);
584                 } else {
585                         available = AVAILABLE_UNKNOWN;
586                 }
587
588                 loc = to_take_or_not_to_take(first, node, loop, available);
589
590                 if (! USES_IS_INFINITE(loc.time)) {
591                         if (USES_IS_PENDING(loc.time))
592                                 ARR_APP1(loc_t, delayed, loc);
593                         else
594                                 ARR_APP1(loc_t, starters, loc);
595                 } else {
596                         be_spill_phi(senv, node);
597                 }
598         }
599
600         /* check all Live-Ins */
601         be_lv_foreach(lv, block, be_lv_state_in, i) {
602                 ir_node *node = be_lv_get_irn(lv, block, i);
603                 unsigned available;
604
605                 if (all_preds_known) {
606                         available = available_in_all_preds(pred_worksets, arity, node, false);
607                 } else {
608                         available = AVAILABLE_UNKNOWN;
609                 }
610
611                 loc = to_take_or_not_to_take(first, node, loop, available);
612
613                 if (! USES_IS_INFINITE(loc.time)) {
614                         if (USES_IS_PENDING(loc.time))
615                                 ARR_APP1(loc_t, delayed, loc);
616                         else
617                                 ARR_APP1(loc_t, starters, loc);
618                 }
619         }
620
621         pressure            = be_get_loop_pressure(loop_ana, cls, loop);
622         assert(ARR_LEN(delayed) <= (signed)pressure);
623         free_slots          = n_regs - ARR_LEN(starters);
624         free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
625         free_slots          = MIN(free_slots, free_pressure_slots);
626
627         /* so far we only put nodes into the starters list that are used inside
628          * the loop. If register pressure in the loop is low then we can take some
629          * values and let them live through the loop */
630         if (free_slots > 0) {
631                 qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
632
633                 for (i = 0; i < ARR_LEN(delayed) && i < free_slots; ++i) {
634                         int    p, arity;
635                         loc_t *loc = & delayed[i];
636
637                         /* don't use values which are dead in a known predecessors
638                          * to not induce unnecessary reloads */
639                         arity = get_irn_arity(block);
640                         for (p = 0; p < arity; ++p) {
641                                 ir_node      *pred_block = get_Block_cfgpred_block(block, p);
642                                 block_info_t *pred_info  = get_block_info(pred_block);
643
644                                 if (pred_info == NULL)
645                                         continue;
646
647                                 if (!workset_contains(pred_info->end_workset, loc->node)) {
648                                         DB((dbg, DBG_START,
649                                             "    delayed %+F not live at pred %+F\n", loc->node,
650                                             pred_block));
651                                         goto skip_delayed;
652                                 }
653                         }
654
655                         DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
656                         ARR_APP1(loc_t, starters, *loc);
657                         loc->node = NULL;
658                 skip_delayed:
659                         ;
660                 }
661         }
662
663         /* spill phis (the actual phis not just their values) that are in this block
664          * but not in the start workset */
665         for (i = ARR_LEN(delayed) - 1; i >= 0; --i) {
666                 ir_node *node = delayed[i].node;
667                 if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
668                         continue;
669
670                 DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
671                 be_spill_phi(senv, node);
672         }
673         DEL_ARR_F(delayed);
674
675         /* Sort start values by first use */
676         qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
677
678         /* Copy the best ones from starters to start workset */
679         ws_count = MIN(ARR_LEN(starters), n_regs);
680         workset_clear(ws);
681         workset_bulk_fill(ws, ws_count, starters);
682
683         /* spill phis (the actual phis not just their values) that are in this block
684          * but not in the start workset */
685         len = ARR_LEN(starters);
686         for (i = ws_count; i < len; ++i) {
687                 ir_node *node = starters[i].node;
688                 if (! is_Phi(node) || get_nodes_block(node) != block)
689                         continue;
690
691                 DB((dbg, DBG_START, "    spilling phi %+F\n", node));
692                 be_spill_phi(senv, node);
693         }
694
695         DEL_ARR_F(starters);
696
697         /* determine spill status of the values: If there's 1 pred block (which
698          * is no backedge) where the value is spilled then we must set it to
699          * spilled here. */
700         for(i = 0; i < ws_count; ++i) {
701                 loc_t   *loc     = &ws->vals[i];
702                 ir_node *value   = loc->node;
703                 bool     spilled;
704                 int      n;
705
706                 /* phis from this block aren't spilled */
707                 if (get_nodes_block(value) == block) {
708                         assert(is_Phi(value));
709                         loc->spilled = false;
710                         continue;
711                 }
712
713                 /* determine if value was spilled on any predecessor */
714                 spilled = false;
715                 for(n = 0; n < arity; ++n) {
716                         workset_t *pred_workset = pred_worksets[n];
717                         int        p_len;
718                         int        p;
719
720                         if (pred_workset == NULL)
721                                 continue;
722
723                         p_len = workset_get_length(pred_workset);
724                         for(p = 0; p < p_len; ++p) {
725                                 loc_t *l = &pred_workset->vals[p];
726
727                                 if (l->node != value)
728                                         continue;
729
730                                 if (l->spilled) {
731                                         spilled = true;
732                                 }
733                                 break;
734                         }
735                 }
736
737                 loc->spilled = spilled;
738         }
739 }
740
741 #if 0
742 static void decide_start_workset2(const ir_node *block)
743 {
744         int         arity;
745         workset_t **pred_worksets;
746         int         p;
747         int         i;
748         int         len;
749
750         /* check if all predecessors are known */
751         arity           = get_irn_arity(block);
752         pred_worksets   = alloca(sizeof(pred_worksets[0]) * arity);
753         for (i = 0; i < arity; ++i) {
754                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
755                 block_info_t *pred_info  = get_block_info(pred_block);
756
757                 if (pred_info == NULL) {
758                         /* not all predecessors known, use decide_start_workset */
759                         decide_start_workset(block);
760                         return;
761                 }
762
763                 pred_worksets[i] = pred_info->end_workset;
764         }
765
766         /* we construct a new workset */
767         workset_clear(ws);
768
769         /* take values live in all pred blocks */
770         len = workset_get_length(pred_worksets[0]);
771         for (p = 0; p < len; ++p) {
772                 const loc_t *l = &pred_worksets[0]->vals[p];
773                 ir_node     *value;
774                 bool         spilled = false;
775
776                 if (USES_IS_INFINITE(l->time))
777                         continue;
778
779                 /* value available in all preds? */
780                 value = l->node;
781                 for (i = 1; i < arity; ++i) {
782                         bool       found     = false;
783                         workset_t *p_workset = pred_worksets[i];
784                         int        p_len     = workset_get_length(p_workset);
785                         int        p_i;
786
787                         for (p_i = 0; p_i < p_len; ++p_i) {
788                                 const loc_t *p_l = &p_workset->vals[p_i];
789                                 if (p_l->node != value)
790                                         continue;
791
792                                 found = true;
793                                 if (p_l->spilled)
794                                         spilled = true;
795                                 break;
796                         }
797
798                         if (!found)
799                                 break;
800                 }
801
802                 /* it was available in all preds */
803                 if (i >= arity) {
804                         workset_insert(ws, value, spilled);
805                 }
806         }
807
808         /* Copy the best ones from starters to start workset */
809         ws_count = MIN(ARR_LEN(starters), n_regs);
810         workset_bulk_fill(ws, ws_count, starters);
811 }
812 #endif
813
814 /**
815  * For the given block @p block, decide for each values
816  * whether it is used from a register or is reloaded
817  * before the use.
818  */
819 static void belady(ir_node *block)
820 {
821         workset_t       *new_vals;
822         ir_node         *irn;
823         int              iter;
824         block_info_t    *block_info;
825         int              i, arity;
826         int              has_backedges = 0;
827         //int              first         = 0;
828         const ir_edge_t *edge;
829
830         /* no need to process a block twice */
831         if (get_block_info(block) != NULL) {
832                 return;
833         }
834
835         /* check if all predecessor blocks are processed yet (though for backedges
836          * we have to make an exception as we can't process them first) */
837         arity = get_Block_n_cfgpreds(block);
838         for(i = 0; i < arity; ++i) {
839                 ir_node      *pred_block = get_Block_cfgpred_block(block, i);
840                 block_info_t *pred_info  = get_block_info(pred_block);
841
842                 if (pred_info == NULL) {
843                         /* process predecessor first (it will be in the queue already) */
844                         if (!is_backedge(block, i)) {
845                                 return;
846                         }
847                         has_backedges = 1;
848                 }
849         }
850         (void) has_backedges;
851         if (arity == 0) {
852                 workset_clear(ws);
853         } else if (arity == 1) {
854                 ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
855                 block_info_t *pred_info  = get_block_info(pred_block);
856
857                 assert(pred_info != NULL);
858                 workset_copy(ws, pred_info->end_workset);
859         } else {
860                 /* we need 2 heuristics here, for the case when all predecessor blocks
861                  * are known and when some are backedges (and therefore can't be known
862                  * yet) */
863                 decide_start_workset(block);
864         }
865
866         DB((dbg, DBG_DECIDE, "\n"));
867         DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
868
869         block_info = new_block_info();
870         set_block_info(block, block_info);
871
872         DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
873         workset_foreach(ws, irn, iter) {
874                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
875                      workset_get_time(ws, iter)));
876         }
877
878         block_info->start_workset = workset_clone(ws);
879
880         /* process the block from start to end */
881         DB((dbg, DBG_WSETS, "Processing...\n"));
882         ir_nodeset_init(&used);
883         instr_nr = 0;
884         /* TODO: this leaks (into the obstack)... */
885         new_vals = new_workset();
886
887         sched_foreach(block, irn) {
888                 int i, arity;
889                 assert(workset_get_length(ws) <= n_regs);
890
891                 /* Phis are no real instr (see insert_starters()) */
892                 if (is_Phi(irn)) {
893                         continue;
894                 }
895                 DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
896
897                 /* set instruction in the workset */
898                 instr = irn;
899
900                 /* allocate all values _used_ by this instruction */
901                 workset_clear(new_vals);
902                 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
903                         ir_node *in = get_irn_n(irn, i);
904                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, in))
905                                 continue;
906
907                         /* (note that "spilled" is irrelevant here) */
908                         workset_insert(new_vals, in, false);
909                 }
910                 displace(new_vals, 1);
911
912                 /* allocate all values _defined_ by this instruction */
913                 workset_clear(new_vals);
914                 if (get_irn_mode(irn) == mode_T) {
915                         const ir_edge_t *edge;
916
917                         foreach_out_edge(irn, edge) {
918                                 ir_node *proj = get_edge_src_irn(edge);
919                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
920                                         continue;
921                                 workset_insert(new_vals, proj, false);
922                         }
923                 } else {
924                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, irn))
925                                 continue;
926                         workset_insert(new_vals, irn, false);
927                 }
928                 displace(new_vals, 0);
929
930                 instr_nr++;
931         }
932         ir_nodeset_destroy(&used);
933
934         /* Remember end-workset for this block */
935         block_info->end_workset = workset_clone(ws);
936         DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
937         workset_foreach(ws, irn, iter)
938                 DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn,
939                      workset_get_time(ws, iter)));
940
941         /* add successor blocks into worklist */
942         foreach_block_succ(block, edge) {
943                 ir_node *succ = get_edge_src_irn(edge);
944                 pdeq_putr(worklist, succ);
945         }
946 }
947
948 /**
949  * 'decide' is block-local and makes assumptions
950  * about the set of live-ins. Thus we must adapt the
951  * live-outs to the live-ins at each block-border.
952  */
953 static void fix_block_borders(ir_node *block, void *data)
954 {
955         workset_t    *start_workset;
956         int           arity;
957         int           i;
958         int           iter;
959         (void) data;
960
961         DB((dbg, DBG_FIX, "\n"));
962         DB((dbg, DBG_FIX, "Fixing %+F\n", block));
963
964         start_workset = get_block_info(block)->start_workset;
965
966         /* process all pred blocks */
967         arity = get_irn_arity(block);
968         for (i = 0; i < arity; ++i) {
969                 ir_node   *pred = get_Block_cfgpred_block(block, i);
970                 workset_t *pred_end_workset = get_block_info(pred)->end_workset;
971                 ir_node   *node;
972
973                 DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
974
975                 /* spill all values not used anymore */
976                 workset_foreach(pred_end_workset, node, iter) {
977                         ir_node *n2;
978                         int      iter2;
979                         bool     found = false;
980                         workset_foreach(start_workset, n2, iter2) {
981                                 if (n2 == node) {
982                                         found = true;
983                                         break;
984                                 }
985                                 /* note that we do not look at phi inputs, becuase the values
986                                  * will be either live-end and need no spill or
987                                  * they have other users in which must be somewhere else in the
988                                  * workset */
989                         }
990
991                         if (found)
992                                 continue;
993
994 #ifdef PLACE_SPILLS
995                         if (be_is_live_in(lv, block, node)
996                                         && !pred_end_workset->vals[iter].spilled) {
997                                 ir_node *insert_point;
998                                 if (arity > 1) {
999                                         insert_point = be_get_end_of_block_insertion_point(pred);
1000                                         insert_point = sched_prev(insert_point);
1001                                 } else {
1002                                         insert_point = block;
1003                                 }
1004                                 DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
1005                                      insert_point));
1006                                 be_add_spill(senv, node, insert_point);
1007                         }
1008 #endif
1009                 }
1010
1011                 /* reload missing values in predecessors, add missing spills */
1012                 workset_foreach(start_workset, node, iter) {
1013                         const loc_t *l    = &start_workset->vals[iter];
1014                         const loc_t *pred_loc;
1015
1016                         /* if node is a phi of the current block we reload
1017                          * the corresponding argument, else node itself */
1018                         if (is_Phi(node) && get_nodes_block(node) == block) {
1019                                 node = get_irn_n(node, i);
1020                                 assert(!l->spilled);
1021
1022                                 /* we might have unknowns as argument for the phi */
1023                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node))
1024                                         continue;
1025                         }
1026
1027                         /* check if node is in a register at end of pred */
1028                         pred_loc = workset_contains(pred_end_workset, node);
1029                         if (pred_loc != NULL) {
1030 #ifdef PLACE_SPILLS
1031                                 /* we might have to spill value on this path */
1032                                 if (!pred_loc->spilled && l->spilled) {
1033                                         ir_node *insert_point
1034                                                 = be_get_end_of_block_insertion_point(pred);
1035                                         insert_point = sched_prev(insert_point);
1036                                         DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
1037                                             insert_point));
1038                                         be_add_spill(senv, node, insert_point);
1039                                 }
1040 #endif
1041                         } else {
1042                                 /* node is not in register at the end of pred -> reload it */
1043                                 DB((dbg, DBG_FIX, "    reload %+F\n", node));
1044                                 DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
1045                                 be_add_reload_on_edge(senv, node, block, i, cls, 1);
1046                         }
1047                 }
1048         }
1049 }
1050
1051 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
1052 {
1053         ir_graph *irg = be_get_birg_irg(birg);
1054
1055         be_liveness_assure_sets(be_assure_liveness(birg));
1056
1057         /* construct control flow loop tree */
1058         if (! (get_irg_loopinfo_state(irg) & loopinfo_cf_consistent)) {
1059                 construct_cf_backedges(irg);
1060         }
1061
1062         be_clear_links(irg);
1063
1064         /* init belady env */
1065         obstack_init(&obst);
1066         arch_env = birg->main_env->arch_env;
1067         cls      = rcls;
1068         lv       = be_get_birg_liveness(birg);
1069         n_regs   = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1070         ws       = new_workset();
1071         uses     = be_begin_uses(irg, lv);
1072         loop_ana = be_new_loop_pressure(birg);
1073         senv     = be_new_spill_env(birg);
1074         worklist = new_pdeq();
1075
1076         pdeq_putr(worklist, get_irg_start_block(irg));
1077
1078         while(!pdeq_empty(worklist)) {
1079                 ir_node *block = pdeq_getl(worklist);
1080                 belady(block);
1081         }
1082         /* end block might not be reachable in endless loops */
1083         belady(get_irg_end_block(irg));
1084
1085         del_pdeq(worklist);
1086
1087         /* belady was block-local, fix the global flow by adding reloads on the
1088          * edges */
1089         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1090
1091         /* Insert spill/reload nodes into the graph and fix usages */
1092         be_insert_spills_reloads(senv);
1093
1094         /* clean up */
1095         be_delete_spill_env(senv);
1096         be_end_uses(uses);
1097         be_free_loop_pressure(loop_ana);
1098         obstack_free(&obst, NULL);
1099 }
1100
1101 void be_init_spillbelady(void)
1102 {
1103         static be_spiller_t belady_spiller = {
1104                 be_spill_belady
1105         };
1106
1107         be_register_spiller("belady", &belady_spiller);
1108         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
1109 }
1110
1111 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady);