Do not mark the transformed as visited. It makes no sense at all.
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include <stdbool.h>
37
38 #include "debug.h"
39 #include "list.h"
40 #include "pdeq.h"
41
42 #include "irnode_t.h"
43 #include "irprintf.h"
44 #include "iredges_t.h"
45 #include "irloop_t.h"
46 #include "irgwalk.h"
47 #include "execfreq.h"
48
49 #include "bemodule.h"
50 #include "bespill.h"
51 #include "beutil.h"
52 #include "bespilloptions.h"
53 #include "besched_t.h"
54 #include "be_t.h"
55
56 #ifndef NDEBUG
57 #define EXPENSIVE_CHECKS
58 #endif
59
60 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
61
62 typedef struct loop_edge_t loop_edge_t;
63 struct loop_edge_t {
64         ir_node     *block;
65         int          pos;
66         loop_edge_t *next;
67 };
68
69 typedef struct loop_info_t loop_info_t;
70 struct loop_info_t {
71         ir_loop     *loop;
72         loop_edge_t *entry_edges;
73         loop_edge_t *exit_edges;
74         size_t       max_register_pressure;
75 };
76
77 typedef struct worklist_entry_t worklist_entry_t;
78 struct worklist_entry_t {
79         struct list_head  head;
80         ir_node          *value;
81         ir_node          *reload_point;
82         ir_loop          *unused_livethrough_loop;
83 };
84
85 typedef struct worklist_t worklist_t;
86 struct worklist_t {
87         struct list_head  live_values;
88         size_t            n_live_values;
89         ir_visited_t      visited;
90 };
91
92 typedef struct block_info_t block_info_t;
93 struct block_info_t {
94         worklist_t *start_worklist;
95         worklist_t *end_worklist;
96 };
97
98 static const arch_env_t            *arch_env;
99 static const arch_register_class_t *cls;
100 static struct obstack               obst;
101 static spill_env_t                 *senv;
102 static size_t                       n_regs;
103 static size_t                       max_register_pressure;
104 static bool                         tentative_mode;
105 static bool                         should_have_reached_fixpoint;
106 static bool                         do_push_unused_livethroughs;
107 static ir_exec_freq                *exec_freq;
108 static ir_visited_t                 worklist_visited;
109
110 static worklist_t *new_worklist(void)
111 {
112         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
113         memset(worklist, 0, sizeof(worklist[0]));
114
115         INIT_LIST_HEAD(&worklist->live_values);
116         worklist->n_live_values    = 0;
117
118         return worklist;
119 }
120
121 static void mark_irn_not_visited(ir_node *node)
122 {
123         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
124 }
125
126 static bool worklist_contains(const ir_node *node)
127 {
128         return irn_visited(node);
129 }
130
131 static block_info_t *get_block_info(ir_node *block)
132 {
133         block_info_t *info = get_irn_link(block);
134         if (info != NULL)
135                 return info;
136
137         info = obstack_alloc(&obst, sizeof(info[0]));
138         memset(info, 0, sizeof(info[0]));
139         set_irn_link(block, info);
140         return info;
141 }
142
143 static void deactivate_worklist(const worklist_t *worklist)
144 {
145         struct list_head *entry;
146
147         list_for_each(entry, &worklist->live_values) {
148                 worklist_entry_t *wl_entry
149                         = list_entry(entry, worklist_entry_t, head);
150                 assert(worklist_contains(wl_entry->value));
151                 mark_irn_not_visited(wl_entry->value);
152                 set_irn_link(wl_entry->value, NULL);
153         }
154 }
155
156 static void activate_worklist(const worklist_t *worklist)
157 {
158         struct list_head *entry;
159
160         list_for_each(entry, &worklist->live_values) {
161                 worklist_entry_t *wl_entry
162                         = list_entry(entry, worklist_entry_t, head);
163                 assert(!worklist_contains(wl_entry->value));
164                 mark_irn_visited(wl_entry->value);
165                 set_irn_link(wl_entry->value, wl_entry);
166         }
167 }
168
169 /**
170  * duplicate a worklist and directly activates it
171  */
172 static void fill_and_activate_worklist(worklist_t *new_worklist,
173                 const worklist_t *worklist, ir_node *block, ir_node *succ_block,
174                 int succ_pos)
175 {
176         ir_node          *reload_point  = NULL;
177         size_t            n_live_values = 0;
178         struct list_head *entry;
179
180         if (succ_block != NULL &&
181                         (get_Block_n_cfgpreds(succ_block) > 1
182                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
183                 reload_point = be_get_end_of_block_insertion_point(block);
184         }
185
186         list_for_each(entry, &worklist->live_values) {
187                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
188                 ir_node          *value     = wl_entry->value;
189                 worklist_entry_t *new_entry;
190
191                 if (new_worklist->n_live_values >= n_regs)
192                         break;
193
194                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
195                         value = get_Phi_pred(value, succ_pos);
196
197                         /* can happen for unknown phi preds */
198                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
199                                 continue;
200                 }
201
202                 if (irn_visited_else_mark(value))
203                         continue;
204
205                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
206                 memset(new_entry, 0, sizeof(new_entry[0]));
207
208                 new_entry->value = value;
209                 if (reload_point != NULL) {
210                         new_entry->reload_point = reload_point;
211                 } else {
212                         new_entry->reload_point = wl_entry->reload_point;
213                 }
214
215                 list_add_tail(&new_entry->head, &new_worklist->live_values);
216                 ++n_live_values;
217
218                 set_irn_link(value, new_entry);
219                 new_worklist->n_live_values++;
220         }
221 }
222
223 static worklist_t *duplicate_worklist(const worklist_t *worklist)
224 {
225         worklist_t       *new_worklist;
226         struct list_head *entry;
227
228         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
229         memset(new_worklist, 0, sizeof(new_worklist[0]));
230         INIT_LIST_HEAD(&new_worklist->live_values);
231         new_worklist->n_live_values = worklist->n_live_values;
232
233         list_for_each(entry, &worklist->live_values) {
234                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
235                 worklist_entry_t *new_entry
236                         = obstack_alloc(&obst, sizeof(new_entry[0]));
237
238                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
239                 list_add_tail(&new_entry->head, &new_worklist->live_values);
240         }
241
242         return new_worklist;
243 }
244
245 #ifdef DEBUG_libfirm
246 static void print_worklist(const worklist_t *worklist, int level)
247 {
248         struct list_head *entry;
249
250         DB((dbg, level, "%d values: ", worklist->n_live_values));
251         list_for_each(entry, &worklist->live_values) {
252                 worklist_entry_t *wl_entry
253                         = list_entry(entry, worklist_entry_t, head);
254
255                 DB((dbg, level, "%+F ", wl_entry->value));
256         }
257 }
258 #endif
259
260
261
262 static void clear_loop_info(ir_loop *loop)
263 {
264         int n_elements = get_loop_n_elements(loop);
265         int i;
266
267         loop->link = NULL;
268         for (i = 0; i < n_elements; ++i) {
269                 loop_element element = get_loop_element(loop, i);
270                 if (*element.kind != k_ir_loop)
271                         continue;
272
273                 clear_loop_info(element.son);
274         }
275 }
276
277 static loop_info_t *get_loop_info(ir_loop *loop)
278 {
279         loop_info_t *info = loop->link;
280         if (info != NULL)
281                 return info;
282
283         info = obstack_alloc(&obst, sizeof(info[0]));
284         memset(info, 0, sizeof(info[0]));
285         info->loop = loop;
286         loop->link = info;
287         return info;
288 }
289
290 /**
291  * constructs loop in+out edges when called in a block walker
292  */
293 static void construct_loop_edges(ir_node *block, void *data)
294 {
295         int      n_cfgpreds = get_Block_n_cfgpreds(block);
296         ir_loop *loop       = get_irn_loop(block);
297         int      i;
298
299         (void) data;
300
301         for (i = 0; i < n_cfgpreds; ++i) {
302                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
303                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
304                 loop_edge_t *edge;
305                 bool        is_exit_edge;
306                 ir_loop     *l, *goal;
307
308                 if (cfgpred_loop == loop)
309                         continue;
310
311                 /* critical edges are splitted, so we can't jump from 1 loop
312                  * directly into another without going out of it */
313                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
314
315                 /* edge out of loop */
316                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
317                         is_exit_edge = true;
318                         l            = cfgpred_loop;
319                         goal         = loop;
320                 } else {
321                         is_exit_edge = false;
322                         l            = loop;
323                         goal         = cfgpred_loop;
324                 }
325
326                 /* this might be a jump out/in multiple loops */
327                 do {
328                         loop_info_t *l_info = get_loop_info(l);
329
330                         edge = obstack_alloc(&obst, sizeof(edge[0]));
331                         memset(edge, 0, sizeof(edge[0]));
332                         edge->block = block;
333                         edge->pos   = i;
334
335                         if (is_exit_edge) {
336                                 edge->next         = l_info->exit_edges;
337                                 l_info->exit_edges = edge;
338                                 assert(get_loop_depth(loop) < get_loop_depth(l));
339                         } else {
340                                 edge->next          = l_info->entry_edges;
341                                 l_info->entry_edges = edge;
342                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
343                         }
344                         l = get_loop_outer_loop(l);
345                 } while(l != goal);
346         }
347 }
348
349
350
351
352 static void place_reload(worklist_entry_t *entry)
353 {
354         if (tentative_mode)
355                 return;
356
357         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
358                 entry->reload_point));
359         assert(entry->reload_point != NULL);
360         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
361         entry->reload_point = NULL;
362 }
363
364 /**
365  * makes sure the worklist contains not more than n_regs - room_needed entries
366  */
367 static void make_room(worklist_t *worklist, size_t room_needed)
368 {
369         int               i;
370         int               spills_needed;
371         struct list_head *entry;
372
373         spills_needed = worklist->n_live_values + room_needed - n_regs;
374         if (spills_needed <= 0)
375                 return;
376
377         entry = worklist->live_values.next;
378         for(i = spills_needed; i > 0; --i) {
379                 struct list_head *next = entry->next;
380                 worklist_entry_t *wl_entry
381                         = list_entry(entry, worklist_entry_t, head);
382
383                 assert(worklist_contains(wl_entry->value));
384                 mark_irn_not_visited(wl_entry->value);
385                 place_reload(wl_entry);
386                 list_del(entry);
387
388                 entry = next;
389         }
390         worklist->n_live_values -= spills_needed;
391 }
392
393 /**
394  * a value was used, so bring it to the back of the worklist (which might
395  * result in a spill of another value).
396  */
397 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
398 {
399         /* already in the worklist? move around, otherwise add at back */
400         worklist_entry_t *entry = get_irn_link(value);
401
402         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, value));
403
404         if (worklist_contains(value)) {
405                 assert(entry != NULL);
406
407                 list_del(&entry->head);
408         } else {
409                 if (entry == NULL) {
410                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
411                         memset(entry, 0, sizeof(entry[0]));
412
413                         entry->value = value;
414                         set_irn_link(value, entry);
415                 }
416
417                 ++worklist->n_live_values;
418                 mark_irn_visited(value);
419         }
420
421         entry->reload_point = sched_point;
422         list_add_tail(&entry->head, &worklist->live_values);
423 }
424
425 static void worklist_remove(worklist_t *worklist, ir_node *value)
426 {
427         worklist_entry_t *entry = get_irn_link(value);
428         assert(entry != NULL);
429         list_del(&entry->head);
430         --worklist->n_live_values;
431
432         assert(worklist_contains(value));
433         mark_irn_not_visited(value);
434 }
435
436 static void update_max_pressure(worklist_t *worklist)
437 {
438         if (worklist->n_live_values > max_register_pressure)
439                 max_register_pressure = worklist->n_live_values;
440 }
441
442 static void do_spilling(ir_node *block, worklist_t *worklist)
443 {
444         ir_node *node;
445
446         assert(worklist != NULL);
447
448         sched_foreach_reverse(block, node) {
449                 int    i, arity;
450                 size_t n_defs = 0;
451
452                 DB((dbg, LEVEL_2, "\t%+F... ", node));
453                 update_max_pressure(worklist);
454
455                 if (is_Phi(node)) {
456                         ir_node *node2;
457                         /* TODO: if we have some free registers, then we could decide to
458                          * not spill some phis (but not for phis where at least 1 input is
459                          * themselfes) */
460
461                         /* we have to spill all phis that are not live */
462                         sched_foreach_reverse_from(node, node2) {
463                                 assert(is_Phi(node2));
464
465                                 if (worklist_contains(node2))
466                                         continue;
467                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node2))
468                                         continue;
469
470                                 if (!tentative_mode)
471                                         be_spill_phi(senv, node2);
472                         }
473                         DB((dbg, LEVEL_2, "\n"));
474                         break;
475                 }
476
477                 /* remove values defined by this instruction from the workset. Values
478                  * defined but not in the workset need free registers */
479                 if (get_irn_mode(node) == mode_T) {
480                         const ir_edge_t *edge;
481
482                         foreach_out_edge(node, edge) {
483                                 ir_node *proj = get_edge_src_irn(edge);
484                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
485                                         continue;
486                                 if (worklist_contains(proj)) {
487                                         worklist_remove(worklist, proj);
488                                 } else {
489                                         ++n_defs;
490                                 }
491                         }
492                 } else if (arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
493                         if (worklist_contains(node)) {
494                                 worklist_remove(worklist, node);
495                         } else {
496                                 n_defs = 1;
497                         }
498                 }
499
500                 /* make sure we have enough free registers for the spills */
501                 make_room(worklist, n_defs);
502
503                 /* put all values used by the instruction into the workset */
504                 arity = get_irn_arity(node);
505                 for(i = 0; i < arity; ++i) {
506                         ir_node *use = get_irn_n(node, i);
507
508                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, use))
509                                 continue;
510
511                         val_used(worklist, use, node);
512                 }
513
514                 /* we might have too many values in the worklist now and need to spill
515                  * some */
516                 make_room(worklist, 0);
517
518 #ifdef DEBUG_libfirm
519                 print_worklist(worklist, LEVEL_2);
520                 DB((dbg, LEVEL_2, "\n"));
521 #endif
522         }
523
524         update_max_pressure(worklist);
525
526 #ifdef DEBUG_libfirm
527         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
528         print_worklist(worklist, LEVEL_1);
529         DB((dbg, LEVEL_1, "\n"));
530 #endif
531 }
532
533 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
534 {
535         const struct list_head *i1 = &wl1->live_values;
536         const struct list_head *i2 = &wl2->live_values;
537
538         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
539                         i1 = i1->next, i2 = i2->next) {
540                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
541                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
542
543                 if (entry1->value != entry2->value)
544                         return false;
545         }
546         /* both lists at the end */
547         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
548                 return false;
549
550         return true;
551 }
552
553 static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
554 {
555         double           best_execfreq   = -1;
556         worklist_t      *best_worklist   = NULL;
557         ir_node         *best_succ_block = NULL;
558         int              best_pos;
559         const ir_edge_t *edge;
560
561         /* construct worklist */
562         foreach_block_succ(block, edge) {
563                 ir_node      *succ_block = get_edge_src_irn(edge);
564                 double       execfreq    = get_block_execfreq(exec_freq, succ_block);
565                 block_info_t *block_info;
566                 worklist_t   *succ_worklist;
567
568                 if (execfreq < best_execfreq)
569                         continue;
570
571                 block_info    = get_block_info(succ_block);
572                 succ_worklist = block_info->start_worklist;
573
574                 if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
575                         continue;
576
577                 best_execfreq   = execfreq;
578                 best_worklist   = succ_worklist;
579                 best_succ_block = succ_block;
580                 best_pos        = get_edge_src_pos(edge);
581         }
582
583         if (best_worklist == NULL)
584                 return false;
585         best_worklist->visited = worklist_visited;
586
587         fill_and_activate_worklist(new_worklist, best_worklist, block,
588                         best_succ_block, best_pos);
589         return true;
590 }
591
592 static worklist_t *construct_start_worklist(ir_node *block)
593 {
594         worklist_t *worklist = new_worklist();
595
596         ++worklist_visited;
597
598         while(fill_start_worklist(worklist, block)) {
599                 if (worklist->n_live_values >= n_regs)
600                         break;
601         }
602
603         return worklist;
604 }
605
606 static void process_block(ir_node *block, void *env)
607 {
608         block_info_t    *block_info;
609         worklist_t      *worklist;
610         int              n_preds;
611
612         (void) env;
613         DB((dbg, LEVEL_1, "Processing %+F\n", block));
614
615         worklist   = construct_start_worklist(block);
616         block_info = get_block_info(block);
617
618 #ifdef DEBUG_libfirm
619         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
620         print_worklist(worklist, LEVEL_1);
621         DB((dbg, LEVEL_1, "\n"));
622
623         if (should_have_reached_fixpoint) {
624                 assert(worklists_equal(block_info->end_worklist, worklist));
625         }
626 #endif
627         block_info->end_worklist = duplicate_worklist(worklist);
628
629         do_spilling(block, worklist);
630         deactivate_worklist(worklist);
631
632 #ifdef DEBUG_libfirm
633         if (should_have_reached_fixpoint) {
634                 assert(worklists_equal(block_info->start_worklist, worklist));
635         }
636 #endif
637         block_info->start_worklist = worklist;
638
639         /* we shouldn't have any live values left at the start block */
640         n_preds = get_Block_n_cfgpreds(block);
641         //assert(n_preds != 0 || worklist->n_live_values == 0);
642 }
643
644 typedef struct block_or_loop_t block_or_loop_t;
645 struct block_or_loop_t {
646         union {
647                 ir_node *block;
648                 ir_loop *loop;
649         } v;
650         bool is_loop;
651 };
652
653 static block_or_loop_t *loop_blocks;
654 static ir_loop         *current_loop;
655
656 static void find_blocks(ir_node *block);
657
658 static void find_in_loop(ir_loop *loop, ir_node *entry)
659 {
660         loop_info_t     *loop_info = get_loop_info(loop);
661         block_or_loop_t block_or_loop;
662         loop_edge_t     *edge;
663
664         /* simply mark 1 block in the loop to indicate that the loop was already
665          * processed */
666         ir_node *some_block = loop_info->entry_edges->block;
667         if (Block_block_visited(some_block))
668                 return;
669
670         block_or_loop.v.loop  = loop;
671         block_or_loop.is_loop = true;
672         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
673
674 #ifndef NDEBUG
675         {
676                 /* we should be 1 of the entry blocks */
677                 loop_edge_t *edge  = loop_info->entry_edges;
678                 bool         found = false;
679                 for ( ; edge != NULL; edge = edge->next) {
680                         if (edge->block == entry)
681                                 found = true;
682                 }
683                 assert(found);
684         }
685 #else
686         (void) entry;
687 #endif
688
689         /* check all loop successors */
690         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
691                 ir_node *succ      = edge->block;
692                 ir_loop *succ_loop = get_irn_loop(succ);
693
694                 if (succ_loop == current_loop) {
695                         find_blocks(succ);
696                 } else {
697                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
698                 }
699         }
700 }
701
702 static void find_blocks(ir_node *block)
703 {
704         const ir_edge_t *edge;
705         block_or_loop_t block_or_loop;
706
707         if (Block_block_visited(block))
708                 return;
709
710         block_or_loop.v.block = block;
711         block_or_loop.is_loop = false;
712
713         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
714         mark_Block_block_visited(block);
715
716         foreach_block_succ(block, edge) {
717                 ir_node *succ = get_edge_src_irn(edge);
718
719                 /* is it part of the current loop? */
720                 ir_loop *loop = get_irn_loop(succ);
721                 if (loop != current_loop) {
722                         /* a sub-loop? */
723                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
724                                 find_in_loop(loop, succ);
725                         } else {
726                                 /* parent loop: we're not interested in the block */
727                         }
728                 } else {
729                         find_blocks(succ);
730                 }
731         }
732 }
733
734 /**
735  * append an entry to a worklist. WARNING: The entry must not already be in the
736  * worklist.
737  */
738 static void worklist_append(worklist_t *worklist, ir_node *value,
739                             ir_node *reload_point,
740                                                         ir_loop *unused_livethrough_loop)
741 {
742         worklist_entry_t *entry     = obstack_alloc(&obst, sizeof(entry[0]));
743         memset(entry, 0, sizeof(entry[0]));
744
745 #ifdef EXPENSIVE_CHECKS
746         {
747                 struct list_head *entry;
748                 list_for_each(entry, &worklist->live_values) {
749                         worklist_entry_t *wl_entry
750                                 = list_entry(entry, worklist_entry_t, head);
751                         assert(wl_entry->value != value);
752                 }
753         }
754 #endif
755
756         entry->value                   = value;
757         entry->reload_point            = reload_point;
758         entry->unused_livethrough_loop = unused_livethrough_loop;
759         list_add_tail(&entry->head, &worklist->live_values);
760         ++worklist->n_live_values;
761         assert(worklist->n_live_values <= n_regs);
762 }
763
764 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
765 {
766         loop_edge_t *edge;
767         ++worklist_visited;
768
769         /* add the value to all loop exit and entry blocks */
770         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
771                 ir_node            *block
772                         = get_Block_cfgpred_block(edge->block, edge->pos);
773                 const block_info_t *info     = get_block_info(block);
774                 worklist_t         *worklist = info->end_worklist;
775                 ir_node            *reload_point = NULL;
776
777                 if (worklist->visited >= worklist_visited)
778                         continue;
779                 worklist->visited = worklist_visited;
780
781                 /* TODO: we need a smarter mechanism here, that makes the reloader place
782                  * reload nodes on all loop exits... */
783
784                 worklist_append(worklist, value, reload_point, loop_info->loop);
785         }
786         edge = loop_info->entry_edges;
787         for ( ; edge != NULL; edge = edge->next) {
788                 ir_node            *entry_block = edge->block;
789                 const block_info_t *info        = get_block_info(entry_block);
790                 worklist_t         *worklist    = info->start_worklist;
791                 ir_node            *pred_block;
792                 ir_node            *reload_point;
793
794                 if (worklist->visited >= worklist_visited)
795                         continue;
796                 worklist->visited = worklist_visited;
797
798                 pred_block   = get_Block_cfgpred_block(entry_block, edge->pos);
799                 reload_point = be_get_end_of_block_insertion_point(pred_block);
800
801                 worklist_append(worklist, value, reload_point, loop_info->loop);
802         }
803
804         set_irn_link(value, NULL);
805         ++loop_info->max_register_pressure;
806 }
807
808 static void push_unused_livethroughs(loop_info_t *loop_info)
809 {
810         loop_edge_t *edge;
811
812         /* we can only push unused livethroughs if register pressure inside the loop
813          * was low enough */
814         if (loop_info->max_register_pressure >= n_regs)
815                 return;
816
817         /* find unused livethroughs: register pressure in the loop was low enough
818          * which means that we had no spills which implies that at every point in
819          * the loop all*/
820         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
821                 ir_node            *block          = edge->block;
822                 const block_info_t *info           = get_block_info(block);
823                 worklist_t         *start_worklist = info->start_worklist;
824                 ir_node            *exit_block;
825                 const block_info_t *exit_info;
826                 worklist_t         *end_worklist;
827                 struct list_head   *entry;
828
829                 if (start_worklist == NULL)
830                         continue;
831
832                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
833                 exit_info    = get_block_info(exit_block);
834                 end_worklist = exit_info->end_worklist;
835
836                 activate_worklist(end_worklist);
837                 /* all values contained in the start_worklist, which are not available
838                  * in the end_worklist, must be unused livethroughs */
839
840                 list_for_each(entry, &start_worklist->live_values) {
841                         worklist_entry_t *wl_entry
842                                 = list_entry(entry, worklist_entry_t, head);
843                         ir_node *value = wl_entry->value;
844
845                         if (loop_info->max_register_pressure >= n_regs)
846                                 break;
847
848
849                         if (!worklist_contains(value)) {
850                                 /* add it to all loop exits */
851                                 DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
852                                 push_unused_livethrough(loop_info, value);
853                                 /* the value should now be part of end_worklist, so mark it */
854                                 mark_irn_visited(value);
855                         }
856                 }
857
858                 deactivate_worklist(end_worklist);
859         }
860 }
861
862 static void process_block_or_loop(const block_or_loop_t *block_or_loop)
863 {
864         if (block_or_loop->is_loop) {
865                 loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop);
866
867                 if (do_push_unused_livethroughs)
868                         push_unused_livethroughs(loop_info);
869
870                 if (loop_info->max_register_pressure > max_register_pressure)
871                         max_register_pressure = loop_info->max_register_pressure;
872
873                 return;
874         }
875         process_block(block_or_loop->v.block, NULL);
876 }
877
878 static void process_loop(ir_loop *loop)
879 {
880         int         n_elements = get_loop_n_elements(loop);
881         int         i, len;
882         loop_info_t *loop_info;
883         loop_edge_t *edge;
884         ir_node     *some_block;
885
886         /* first handle all sub-loops */
887         for (i = 0; i < n_elements; ++i) {
888                 loop_element element = get_loop_element(loop, i);
889                 if (*element.kind != k_ir_loop)
890                         continue;
891
892                 process_loop(element.son);
893         }
894
895         /* create a postorder of the blocks */
896         loop_info = get_loop_info(loop);
897         edge      = loop_info->entry_edges;
898         if (edge != NULL) {
899                 some_block = edge->block;
900         } else {
901                 assert(loop == get_irg_loop(current_ir_graph));
902                 some_block = get_irg_start_block(current_ir_graph);
903         }
904
905         loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
906         current_loop = loop;
907
908         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
909         inc_irg_block_visited(current_ir_graph);
910         find_blocks(some_block);
911         /* for endless loops the end-block might be unreachable */
912         if (loop == get_irg_loop(current_ir_graph)) {
913                 find_blocks(get_irg_end_block(current_ir_graph));
914         }
915         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
916
917         DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
918         len = ARR_LEN(loop_blocks);
919         for (i = 0; i < len; ++i) {
920                 block_or_loop_t *block_or_loop = &loop_blocks[i];
921                 if (block_or_loop->is_loop) {
922                         DB((dbg, LEVEL_3, " L-%p", block_or_loop->v.loop));
923                 } else {
924                         DB((dbg, LEVEL_3, " B-%+F", block_or_loop->v.block));
925                 }
926         }
927         DB((dbg, LEVEL_3, "\n"));
928
929         max_register_pressure = 0;
930
931         /* run1: tentative phase */
932         tentative_mode = true;
933         for (i = len-1; i >= 0; --i) {
934                 process_block_or_loop(&loop_blocks[i]);
935         }
936
937         /* run2: tentative phase - should reach fixpoint */
938         tentative_mode = true;
939         for (i = len-1; i >= 0; --i) {
940                 process_block_or_loop(&loop_blocks[i]);
941         }
942
943 #ifndef NDEBUG
944         /* run3: tentative phase - check fixpoint */
945         tentative_mode               = true;
946         should_have_reached_fixpoint = true;
947         for (i = len-1; i >= 0; --i) {
948                 process_block_or_loop(&loop_blocks[i]);
949         }
950         should_have_reached_fixpoint = false;
951 #endif
952
953         /* run4: add spills/reloads */
954         tentative_mode              = false;
955         do_push_unused_livethroughs = true;
956         for (i = len-1; i >= 0; --i) {
957                 process_block_or_loop(&loop_blocks[i]);
958         }
959         do_push_unused_livethroughs = false;
960
961         loop_info->max_register_pressure = max_register_pressure;
962         DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
963                                 (unsigned) max_register_pressure));
964
965         DEL_ARR_F(loop_blocks);
966 }
967
968 static void fix_block_borders(ir_node *block, void *data)
969 {
970         block_info_t *block_info     = get_block_info(block);
971         worklist_t   *start_worklist = block_info->start_worklist;
972         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
973         ir_loop      *loop           = get_irn_loop(block);
974         int           i;
975
976         (void) data;
977         assert(start_worklist != NULL);
978
979         for (i = 0; i < n_cfgpreds; ++i) {
980                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
981                 block_info_t *pred_block_info = get_block_info(pred_block);
982                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
983                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
984                 bool          is_loop_entry   = false;
985                 struct list_head *entry;
986
987                 assert(end_worklist != NULL);
988
989                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
990                         is_loop_entry = true;
991                 }
992
993                 /* reload missing values */
994                 activate_worklist(end_worklist);
995
996                 list_for_each(entry, &start_worklist->live_values) {
997                         worklist_entry_t *wl_entry
998                                 = list_entry(entry, worklist_entry_t, head);
999                         ir_node          *value = wl_entry->value;
1000
1001                         if (is_Phi(value) && get_nodes_block(value) == block) {
1002                                 value = get_irn_n(value, i);
1003
1004                                 /* we might have unknowns as argument for the phi */
1005                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
1006                                         continue;
1007                         }
1008
1009                         if (worklist_contains(value))
1010                                 continue;
1011                         if (wl_entry->unused_livethrough_loop != NULL && !is_loop_entry)
1012                                 continue;
1013
1014                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
1015                 }
1016
1017                 deactivate_worklist(end_worklist);
1018         }
1019 }
1020
1021 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
1022 {
1023         ir_graph *irg = be_get_birg_irg(birg);
1024
1025         cls    = ncls;
1026         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1027
1028         /* shortcut for register classes with ignore regs only */
1029         if (n_regs == 0)
1030                 return;
1031
1032         worklist_visited = 0;
1033         arch_env         = be_get_birg_arch_env(birg);
1034         exec_freq        = be_get_birg_exec_freq(birg);
1035
1036         be_clear_links(irg);
1037         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1038                         | IR_RESOURCE_LOOP_LINK);
1039         inc_irg_visited(irg);
1040
1041         obstack_init(&obst);
1042         senv = be_new_spill_env(birg);
1043
1044         assure_cf_loop(irg);
1045         clear_loop_info(get_irg_loop(irg));
1046         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
1047
1048         process_loop(get_irg_loop(current_ir_graph));
1049
1050         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1051
1052         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1053                         | IR_RESOURCE_LOOP_LINK);
1054
1055         be_insert_spills_reloads(senv);
1056
1057         obstack_free(&obst, NULL);
1058
1059         /* clean up */
1060         be_delete_spill_env(senv);
1061 }
1062
1063 void be_init_spillbelady3(void)
1064 {
1065         static be_spiller_t belady3_spiller = {
1066                 be_spill_belady3
1067         };
1068
1069         be_register_spiller("belady3", &belady3_spiller);
1070         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1071 }
1072
1073 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);