cleanup and fix permutate_values (3-cycles and values being used 2 times with 1 being...
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #include "config.h"
33
34 #include <stdbool.h>
35
36 #include "debug.h"
37 #include "list.h"
38 #include "pdeq.h"
39
40 #include "irnode_t.h"
41 #include "irprintf.h"
42 #include "iredges_t.h"
43 #include "irloop_t.h"
44 #include "irgwalk.h"
45 #include "execfreq.h"
46
47 #include "bemodule.h"
48 #include "bespill.h"
49 #include "beutil.h"
50 #include "bespillutil.h"
51 #include "besched.h"
52 #include "be_t.h"
53
54 #ifndef NDEBUG
55 #define EXPENSIVE_CHECKS
56 #endif
57
58 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
59
60 typedef struct loop_edge_t loop_edge_t;
61 struct loop_edge_t {
62         ir_node     *block;
63         int          pos;
64         loop_edge_t *next;
65 };
66
67 typedef struct loop_info_t loop_info_t;
68 struct loop_info_t {
69         ir_loop     *loop;
70         loop_edge_t *entry_edges;
71         loop_edge_t *exit_edges;
72         size_t       max_register_pressure;
73 };
74
75 typedef struct worklist_entry_t worklist_entry_t;
76 struct worklist_entry_t {
77         struct list_head  head;
78         ir_node          *value;
79         ir_node          *reload_point;
80         ir_loop          *unused_livethrough_loop;
81 };
82
83 #define foreach_worklist(entry, wl) list_for_each_entry(worklist_entry_t, entry, &(wl)->live_values, head)
84
85 typedef struct worklist_t worklist_t;
86 struct worklist_t {
87         struct list_head  live_values;
88         size_t            n_live_values;
89         ir_visited_t      visited;
90 };
91
92 typedef struct block_info_t block_info_t;
93 struct block_info_t {
94         worklist_t *start_worklist;
95         worklist_t *end_worklist;
96 };
97
98 /** The register class we currently run on. */
99 static const arch_register_class_t *cls;
100 static struct obstack               obst;
101 static spill_env_t                 *senv;
102 static size_t                       n_regs;
103 static size_t                       max_register_pressure;
104 static bool                         tentative_mode;
105 static bool                         do_push_unused_livethroughs;
106 /** Execution frequency for the current graph. */
107 static ir_exec_freq                *exec_freq;
108 static ir_visited_t                 worklist_visited;
109 #ifdef DEBUG_libfirm
110 static bool                         should_have_reached_fixpoint;
111 #endif
112
113 static worklist_t *new_worklist(void)
114 {
115         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
116         memset(worklist, 0, sizeof(worklist[0]));
117
118         INIT_LIST_HEAD(&worklist->live_values);
119         worklist->n_live_values    = 0;
120
121         return worklist;
122 }
123
124 static void mark_irn_not_visited(ir_node *node)
125 {
126         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
127 }
128
129 static bool worklist_contains(const ir_node *node)
130 {
131         return irn_visited(node);
132 }
133
134 static block_info_t *get_block_info(ir_node *block)
135 {
136         block_info_t *info = get_irn_link(block);
137         if (info != NULL)
138                 return info;
139
140         info = obstack_alloc(&obst, sizeof(info[0]));
141         memset(info, 0, sizeof(info[0]));
142         set_irn_link(block, info);
143         return info;
144 }
145
146 static void deactivate_worklist(const worklist_t *worklist)
147 {
148         worklist_entry_t *entry;
149
150         foreach_worklist(entry, worklist) {
151                 assert(worklist_contains(entry->value));
152                 mark_irn_not_visited(entry->value);
153                 set_irn_link(entry->value, NULL);
154         }
155 }
156
157 static void activate_worklist(const worklist_t *worklist)
158 {
159         worklist_entry_t *entry;
160
161         foreach_worklist(entry, worklist) {
162                 assert(!worklist_contains(entry->value));
163                 mark_irn_visited(entry->value);
164                 set_irn_link(entry->value, entry);
165         }
166 }
167
168 /**
169  * duplicate a worklist and directly activates it
170  */
171 static void fill_and_activate_worklist(worklist_t *new_worklist,
172                 const worklist_t *worklist, ir_node *block, ir_node *succ_block,
173                 int succ_pos)
174 {
175         ir_node          *reload_point  = NULL;
176         size_t            n_live_values = 0;
177         worklist_entry_t *entry;
178
179         if (succ_block != NULL &&
180                         (get_Block_n_cfgpreds(succ_block) > 1
181                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
182                 reload_point = be_get_end_of_block_insertion_point(block);
183         }
184
185         foreach_worklist(entry, worklist) {
186                 ir_node          *value = entry->value;
187                 worklist_entry_t *new_entry;
188
189                 if (new_worklist->n_live_values >= n_regs)
190                         break;
191
192                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
193                         value = get_Phi_pred(value, succ_pos);
194
195                         /* can happen for unknown phi preds */
196                         if (!arch_irn_consider_in_reg_alloc(cls, value))
197                                 continue;
198                 }
199
200                 if (irn_visited_else_mark(value))
201                         continue;
202
203                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
204                 memset(new_entry, 0, sizeof(new_entry[0]));
205
206                 new_entry->value = value;
207                 if (reload_point != NULL) {
208                         new_entry->reload_point = reload_point;
209                 } else {
210                         new_entry->reload_point = entry->reload_point;
211                 }
212
213                 list_add_tail(&new_entry->head, &new_worklist->live_values);
214                 ++n_live_values;
215
216                 set_irn_link(value, new_entry);
217                 new_worklist->n_live_values++;
218         }
219 }
220
221 static worklist_t *duplicate_worklist(const worklist_t *worklist)
222 {
223         worklist_t       *new_worklist;
224         struct list_head *entry;
225
226         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
227         memset(new_worklist, 0, sizeof(new_worklist[0]));
228         INIT_LIST_HEAD(&new_worklist->live_values);
229         new_worklist->n_live_values = worklist->n_live_values;
230
231         list_for_each(entry, &worklist->live_values) {
232                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
233                 worklist_entry_t *new_entry
234                         = obstack_alloc(&obst, sizeof(new_entry[0]));
235
236                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
237                 list_add_tail(&new_entry->head, &new_worklist->live_values);
238         }
239
240         return new_worklist;
241 }
242
243 #ifdef DEBUG_libfirm
244 static void print_worklist(const worklist_t *worklist, int level)
245 {
246         struct list_head *entry;
247
248         DB((dbg, level, "%d values: ", worklist->n_live_values));
249         list_for_each(entry, &worklist->live_values) {
250                 worklist_entry_t *wl_entry
251                         = list_entry(entry, worklist_entry_t, head);
252
253                 DB((dbg, level, "%+F ", wl_entry->value));
254         }
255 }
256 #endif
257
258
259 /**
260  * Clear the link fields of a loop and all
261  * its child loops.
262  *
263  * @param loop  the root loop
264  */
265 static void clear_loop_info(ir_loop *loop)
266 {
267         int n_elements = get_loop_n_elements(loop);
268         int i;
269
270         loop->link = NULL;
271         for (i = 0; i < n_elements; ++i) {
272                 loop_element element = get_loop_element(loop, i);
273                 if (*element.kind != k_ir_loop)
274                         continue;
275
276                 clear_loop_info(element.son);
277         }
278 }
279
280 static loop_info_t *get_loop_info(ir_loop *loop)
281 {
282         loop_info_t *info = loop->link;
283         if (info != NULL)
284                 return info;
285
286         info = obstack_alloc(&obst, sizeof(info[0]));
287         memset(info, 0, sizeof(info[0]));
288         info->loop = loop;
289         loop->link = info;
290         return info;
291 }
292
293 /**
294  * constructs loop in+out edges when called in a block walker
295  */
296 static void construct_loop_edges(ir_node *block, void *data)
297 {
298         int      n_cfgpreds = get_Block_n_cfgpreds(block);
299         ir_loop *loop       = get_irn_loop(block);
300         int      i;
301
302         (void) data;
303
304         for (i = 0; i < n_cfgpreds; ++i) {
305                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
306                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
307                 loop_edge_t *edge;
308                 bool        is_exit_edge;
309                 ir_loop     *l, *goal;
310
311                 if (cfgpred_loop == loop)
312                         continue;
313
314                 /* critical edges are splitted, so we can't jump from 1 loop
315                  * directly into another without going out of it */
316                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
317
318                 /* edge out of loop */
319                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
320                         is_exit_edge = true;
321                         l            = cfgpred_loop;
322                         goal         = loop;
323                 } else {
324                         is_exit_edge = false;
325                         l            = loop;
326                         goal         = cfgpred_loop;
327                 }
328
329                 /* this might be a jump out/in multiple loops */
330                 do {
331                         loop_info_t *l_info = get_loop_info(l);
332
333                         edge = obstack_alloc(&obst, sizeof(edge[0]));
334                         memset(edge, 0, sizeof(edge[0]));
335                         edge->block = block;
336                         edge->pos   = i;
337
338                         if (is_exit_edge) {
339                                 edge->next         = l_info->exit_edges;
340                                 l_info->exit_edges = edge;
341                                 assert(get_loop_depth(loop) < get_loop_depth(l));
342                         } else {
343                                 edge->next          = l_info->entry_edges;
344                                 l_info->entry_edges = edge;
345                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
346                         }
347                         l = get_loop_outer_loop(l);
348                 } while (l != goal);
349         }
350 }
351
352
353
354
355 static void place_reload(worklist_entry_t *entry)
356 {
357         if (tentative_mode)
358                 return;
359
360         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
361                 entry->reload_point));
362         assert(entry->reload_point != NULL);
363         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
364         entry->reload_point = NULL;
365 }
366
367 /**
368  * makes sure the worklist contains not more than n_regs - room_needed entries
369  */
370 static void make_room(worklist_t *worklist, size_t room_needed)
371 {
372         int               i;
373         int               spills_needed;
374         struct list_head *entry;
375
376         spills_needed = worklist->n_live_values + room_needed - n_regs;
377         if (spills_needed <= 0)
378                 return;
379
380         entry = worklist->live_values.next;
381         for(i = spills_needed; i > 0; --i) {
382                 struct list_head *next = entry->next;
383                 worklist_entry_t *wl_entry
384                         = list_entry(entry, worklist_entry_t, head);
385
386                 assert(worklist_contains(wl_entry->value));
387                 mark_irn_not_visited(wl_entry->value);
388                 place_reload(wl_entry);
389                 list_del(entry);
390
391                 entry = next;
392         }
393         worklist->n_live_values -= spills_needed;
394 }
395
396 /**
397  * a value was used, so bring it to the back of the worklist (which might
398  * result in a spill of another value).
399  */
400 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
401 {
402         /* already in the worklist? move around, otherwise add at back */
403         worklist_entry_t *entry = get_irn_link(value);
404
405         assert(arch_irn_consider_in_reg_alloc(cls, value));
406
407         if (worklist_contains(value)) {
408                 assert(entry != NULL);
409
410                 list_del(&entry->head);
411         } else {
412                 if (entry == NULL) {
413                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
414                         memset(entry, 0, sizeof(entry[0]));
415
416                         entry->value = value;
417                         set_irn_link(value, entry);
418                 }
419
420                 ++worklist->n_live_values;
421                 mark_irn_visited(value);
422         }
423
424         entry->reload_point = sched_point;
425         list_add_tail(&entry->head, &worklist->live_values);
426 }
427
428 static void worklist_remove(worklist_t *worklist, ir_node *value)
429 {
430         worklist_entry_t *entry = get_irn_link(value);
431         assert(entry != NULL);
432         list_del(&entry->head);
433         --worklist->n_live_values;
434
435         assert(worklist_contains(value));
436         mark_irn_not_visited(value);
437 }
438
439 static void update_max_pressure(worklist_t *worklist)
440 {
441         if (worklist->n_live_values > max_register_pressure)
442                 max_register_pressure = worklist->n_live_values;
443 }
444
445 static void do_spilling(ir_node *block, worklist_t *worklist)
446 {
447         ir_node *node;
448
449         assert(worklist != NULL);
450
451         sched_foreach_reverse(block, node) {
452                 int    i, arity;
453                 size_t n_defs = 0;
454
455                 DB((dbg, LEVEL_2, "\t%+F... ", node));
456                 update_max_pressure(worklist);
457
458                 if (is_Phi(node)) {
459                         ir_node *node2;
460                         /* TODO: if we have some free registers, then we could decide to
461                          * not spill some phis (but not for phis where at least 1 input is
462                          * themselfes) */
463
464                         /* we have to spill all phis that are not live */
465                         sched_foreach_reverse_from(node, node2) {
466                                 assert(is_Phi(node2));
467
468                                 if (worklist_contains(node2))
469                                         continue;
470                                 if (!arch_irn_consider_in_reg_alloc(cls, node2))
471                                         continue;
472
473                                 if (!tentative_mode)
474                                         be_spill_phi(senv, node2);
475                         }
476                         DB((dbg, LEVEL_2, "\n"));
477                         break;
478                 }
479
480                 /* remove values defined by this instruction from the workset. Values
481                  * defined but not in the workset need free registers */
482                 if (get_irn_mode(node) == mode_T) {
483                         const ir_edge_t *edge;
484
485                         foreach_out_edge(node, edge) {
486                                 ir_node *proj = get_edge_src_irn(edge);
487                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
488                                         continue;
489                                 if (worklist_contains(proj)) {
490                                         worklist_remove(worklist, proj);
491                                 } else {
492                                         ++n_defs;
493                                 }
494                         }
495                 } else if (arch_irn_consider_in_reg_alloc(cls, node)) {
496                         if (worklist_contains(node)) {
497                                 worklist_remove(worklist, node);
498                         } else {
499                                 n_defs = 1;
500                         }
501                 }
502
503                 /* make sure we have enough free registers for the spills */
504                 make_room(worklist, n_defs);
505
506                 /* put all values used by the instruction into the workset */
507                 arity = get_irn_arity(node);
508                 for(i = 0; i < arity; ++i) {
509                         ir_node *use = get_irn_n(node, i);
510
511                         if (!arch_irn_consider_in_reg_alloc(cls, use))
512                                 continue;
513
514                         val_used(worklist, use, node);
515                 }
516
517                 /* we might have too many values in the worklist now and need to spill
518                  * some */
519                 make_room(worklist, 0);
520
521 #ifdef DEBUG_libfirm
522                 print_worklist(worklist, LEVEL_2);
523                 DB((dbg, LEVEL_2, "\n"));
524 #endif
525         }
526
527         update_max_pressure(worklist);
528
529 #ifdef DEBUG_libfirm
530         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
531         print_worklist(worklist, LEVEL_1);
532         DB((dbg, LEVEL_1, "\n"));
533 #endif
534 }
535
536 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
537 {
538         const struct list_head *i1 = &wl1->live_values;
539         const struct list_head *i2 = &wl2->live_values;
540
541         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
542                         i1 = i1->next, i2 = i2->next) {
543                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
544                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
545
546                 if (entry1->value != entry2->value)
547                         return false;
548         }
549         /* both lists at the end */
550         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
551                 return false;
552
553         return true;
554 }
555
556 static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
557 {
558         double           best_execfreq   = -1;
559         worklist_t      *best_worklist   = NULL;
560         ir_node         *best_succ_block = NULL;
561         int              best_pos        = -1;
562         const ir_edge_t *edge;
563
564         /* construct worklist */
565         foreach_block_succ(block, edge) {
566                 ir_node      *succ_block = get_edge_src_irn(edge);
567                 double       execfreq    = get_block_execfreq(exec_freq, succ_block);
568                 block_info_t *block_info;
569                 worklist_t   *succ_worklist;
570
571                 if (execfreq < best_execfreq)
572                         continue;
573
574                 block_info    = get_block_info(succ_block);
575                 succ_worklist = block_info->start_worklist;
576
577                 if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
578                         continue;
579
580                 best_execfreq   = execfreq;
581                 best_worklist   = succ_worklist;
582                 best_succ_block = succ_block;
583                 best_pos        = get_edge_src_pos(edge);
584         }
585
586         if (best_worklist == NULL)
587                 return false;
588         best_worklist->visited = worklist_visited;
589
590         fill_and_activate_worklist(new_worklist, best_worklist, block,
591                         best_succ_block, best_pos);
592         return true;
593 }
594
595 static worklist_t *construct_start_worklist(ir_node *block)
596 {
597         worklist_t *worklist = new_worklist();
598
599         ++worklist_visited;
600
601         while(fill_start_worklist(worklist, block)) {
602                 if (worklist->n_live_values >= n_regs)
603                         break;
604         }
605
606         return worklist;
607 }
608
609 static void process_block(ir_node *block, void *env)
610 {
611         block_info_t    *block_info;
612         worklist_t      *worklist;
613         int              n_preds;
614
615         (void) env;
616         DB((dbg, LEVEL_1, "Processing %+F\n", block));
617
618         worklist   = construct_start_worklist(block);
619         block_info = get_block_info(block);
620
621 #ifdef DEBUG_libfirm
622         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
623         print_worklist(worklist, LEVEL_1);
624         DB((dbg, LEVEL_1, "\n"));
625
626         if (should_have_reached_fixpoint) {
627                 assert(worklists_equal(block_info->end_worklist, worklist));
628         }
629 #endif
630         block_info->end_worklist = duplicate_worklist(worklist);
631
632         do_spilling(block, worklist);
633         deactivate_worklist(worklist);
634
635 #ifdef DEBUG_libfirm
636         if (should_have_reached_fixpoint) {
637                 assert(worklists_equal(block_info->start_worklist, worklist));
638         }
639 #endif
640         block_info->start_worklist = worklist;
641
642         /* we shouldn't have any live values left at the start block */
643         n_preds = get_Block_n_cfgpreds(block);
644         //assert(n_preds != 0 || worklist->n_live_values == 0);
645 }
646
647 typedef union block_or_loop_t block_or_loop_t;
648 union block_or_loop_t {
649         ir_node *block;
650         ir_loop *loop;
651 };
652
653 static block_or_loop_t *loop_blocks;
654 static ir_loop         *current_loop;
655
656 static void find_blocks(ir_node *block);
657
658 static void find_in_loop(ir_loop *loop, ir_node *entry)
659 {
660         loop_info_t     *loop_info = get_loop_info(loop);
661         block_or_loop_t block_or_loop;
662         loop_edge_t     *edge;
663
664         /* simply mark 1 block in the loop to indicate that the loop was already
665          * processed */
666         ir_node *some_block = loop_info->entry_edges->block;
667         if (Block_block_visited(some_block))
668                 return;
669
670         block_or_loop.loop = loop;
671         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
672
673 #ifndef NDEBUG
674         {
675                 /* we should be 1 of the entry blocks */
676                 loop_edge_t *edge  = loop_info->entry_edges;
677                 bool         found = false;
678                 for ( ; edge != NULL; edge = edge->next) {
679                         if (edge->block == entry) {
680                                 found = true;
681                                 break;
682                         }
683                 }
684                 assert(found);
685         }
686 #else
687         (void) entry;
688 #endif
689
690         /* check all loop successors */
691         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
692                 ir_node *succ      = edge->block;
693                 ir_loop *succ_loop = get_irn_loop(succ);
694
695                 if (succ_loop == current_loop) {
696                         find_blocks(succ);
697                 } else {
698                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
699                 }
700         }
701 }
702
703 static void find_blocks(ir_node *block)
704 {
705         const ir_edge_t *edge;
706         block_or_loop_t block_or_loop;
707
708         if (Block_block_visited(block))
709                 return;
710
711         block_or_loop.block = block;
712
713         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
714         mark_Block_block_visited(block);
715
716         foreach_block_succ(block, edge) {
717                 ir_node *succ = get_edge_src_irn(edge);
718
719                 /* is it part of the current loop? */
720                 ir_loop *loop = get_irn_loop(succ);
721                 if (loop != current_loop) {
722                         /* a sub-loop? */
723                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
724                                 find_in_loop(loop, succ);
725                         } else {
726                                 /* parent loop: we're not interested in the block */
727                         }
728                 } else {
729                         find_blocks(succ);
730                 }
731         }
732 }
733
734 /**
735  * append an entry to a worklist. WARNING: The entry must not already be in the
736  * worklist.
737  */
738 static void worklist_append(worklist_t *worklist, ir_node *value,
739                             ir_node *reload_point,
740                                                         ir_loop *unused_livethrough_loop)
741 {
742         worklist_entry_t *entry;
743
744 #ifdef EXPENSIVE_CHECKS
745         foreach_worklist(entry, worklist) {
746                 assert(entry->value != value);
747         }
748 #endif
749
750         entry = obstack_alloc(&obst, sizeof(*entry));
751         memset(entry, 0, sizeof(entry[0]));
752
753         entry->value                   = value;
754         entry->reload_point            = reload_point;
755         entry->unused_livethrough_loop = unused_livethrough_loop;
756         list_add_tail(&entry->head, &worklist->live_values);
757         ++worklist->n_live_values;
758         assert(worklist->n_live_values <= n_regs);
759 }
760
761 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
762 {
763         loop_edge_t *edge;
764         ++worklist_visited;
765
766         /* add the value to all loop exit and entry blocks */
767         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
768                 ir_node            *block
769                         = get_Block_cfgpred_block(edge->block, edge->pos);
770                 const block_info_t *info     = get_block_info(block);
771                 worklist_t         *worklist = info->end_worklist;
772                 ir_node            *reload_point = NULL;
773
774                 if (worklist->visited >= worklist_visited)
775                         continue;
776                 worklist->visited = worklist_visited;
777
778                 /* TODO: we need a smarter mechanism here, that makes the reloader place
779                  * reload nodes on all loop exits... */
780
781                 worklist_append(worklist, value, reload_point, loop_info->loop);
782         }
783         edge = loop_info->entry_edges;
784         for ( ; edge != NULL; edge = edge->next) {
785                 ir_node            *entry_block = edge->block;
786                 const block_info_t *info        = get_block_info(entry_block);
787                 worklist_t         *worklist    = info->start_worklist;
788                 ir_node            *pred_block;
789                 ir_node            *reload_point;
790
791                 if (worklist->visited >= worklist_visited)
792                         continue;
793                 worklist->visited = worklist_visited;
794
795                 pred_block   = get_Block_cfgpred_block(entry_block, edge->pos);
796                 reload_point = be_get_end_of_block_insertion_point(pred_block);
797
798                 worklist_append(worklist, value, reload_point, loop_info->loop);
799         }
800
801         set_irn_link(value, NULL);
802         ++loop_info->max_register_pressure;
803 }
804
805 static void push_unused_livethroughs(loop_info_t *loop_info)
806 {
807         loop_edge_t *edge;
808
809         /* we can only push unused livethroughs if register pressure inside the loop
810          * was low enough */
811         if (loop_info->max_register_pressure >= n_regs)
812                 return;
813
814         /* find unused livethroughs: register pressure in the loop was low enough
815          * which means that we had no spills which implies that at every point in
816          * the loop all*/
817         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
818                 ir_node            *block          = edge->block;
819                 const block_info_t *info           = get_block_info(block);
820                 worklist_t         *start_worklist = info->start_worklist;
821                 ir_node            *exit_block;
822                 const block_info_t *exit_info;
823                 worklist_t         *end_worklist;
824                 struct list_head   *entry;
825
826                 if (start_worklist == NULL)
827                         continue;
828
829                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
830                 exit_info    = get_block_info(exit_block);
831                 end_worklist = exit_info->end_worklist;
832
833                 activate_worklist(end_worklist);
834                 /* all values contained in the start_worklist, which are not available
835                  * in the end_worklist, must be unused livethroughs */
836
837                 list_for_each(entry, &start_worklist->live_values) {
838                         worklist_entry_t *wl_entry
839                                 = list_entry(entry, worklist_entry_t, head);
840                         ir_node *value = wl_entry->value;
841
842                         if (loop_info->max_register_pressure >= n_regs)
843                                 break;
844
845
846                         if (!worklist_contains(value)) {
847                                 /* add it to all loop exits */
848                                 DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
849                                 push_unused_livethrough(loop_info, value);
850                                 /* the value should now be part of end_worklist, so mark it */
851                                 mark_irn_visited(value);
852                         }
853                 }
854
855                 deactivate_worklist(end_worklist);
856         }
857 }
858
859 static void process_block_or_loop(const block_or_loop_t block_or_loop)
860 {
861         if (block_or_loop.loop->kind == k_ir_loop) {
862                 loop_info_t *loop_info = get_loop_info(block_or_loop.loop);
863
864                 if (do_push_unused_livethroughs)
865                         push_unused_livethroughs(loop_info);
866
867                 if (loop_info->max_register_pressure > max_register_pressure)
868                         max_register_pressure = loop_info->max_register_pressure;
869
870                 return;
871         }
872         process_block(block_or_loop.block, NULL);
873 }
874
875 static void process_loop(ir_loop *loop)
876 {
877         int         n_elements = get_loop_n_elements(loop);
878         int         i, len;
879         loop_info_t *loop_info;
880         loop_edge_t *edge;
881         ir_node     *some_block;
882
883         /* first handle all sub-loops */
884         for (i = 0; i < n_elements; ++i) {
885                 loop_element element = get_loop_element(loop, i);
886                 if (*element.kind != k_ir_loop)
887                         continue;
888
889                 process_loop(element.son);
890         }
891
892         /* create a postorder of the blocks */
893         loop_info = get_loop_info(loop);
894         edge      = loop_info->entry_edges;
895         if (edge != NULL) {
896                 some_block = edge->block;
897         } else {
898                 assert(loop == get_irg_loop(current_ir_graph));
899                 some_block = get_irg_start_block(current_ir_graph);
900         }
901
902         loop_blocks  = NEW_ARR_F(block_or_loop_t, 0);
903         current_loop = loop;
904
905         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
906         inc_irg_block_visited(current_ir_graph);
907         find_blocks(some_block);
908         /* for endless loops the end-block might be unreachable */
909         if (loop == get_irg_loop(current_ir_graph)) {
910                 find_blocks(get_irg_end_block(current_ir_graph));
911         }
912         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
913
914         DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
915         len = ARR_LEN(loop_blocks);
916         for (i = 0; i < len; ++i) {
917                 block_or_loop_t block_or_loop = loop_blocks[i];
918                 if (block_or_loop.loop->kind == k_ir_loop) {
919                         DB((dbg, LEVEL_3, " L-%p", block_or_loop.loop));
920                 } else {
921                         DB((dbg, LEVEL_3, " B-%+F", block_or_loop.block));
922                 }
923         }
924         DB((dbg, LEVEL_3, "\n"));
925
926         max_register_pressure = 0;
927
928         /* run1: tentative phase */
929         tentative_mode = true;
930         for (i = len-1; i >= 0; --i) {
931                 process_block_or_loop(loop_blocks[i]);
932         }
933
934         /* run2: tentative phase - should reach fixpoint */
935         tentative_mode = true;
936         for (i = len-1; i >= 0; --i) {
937                 process_block_or_loop(loop_blocks[i]);
938         }
939
940 #ifndef NDEBUG
941         /* run3: tentative phase - check fixpoint */
942         tentative_mode               = true;
943         should_have_reached_fixpoint = true;
944         for (i = len-1; i >= 0; --i) {
945                 process_block_or_loop(loop_blocks[i]);
946         }
947         should_have_reached_fixpoint = false;
948 #endif
949
950         /* run4: add spills/reloads */
951         tentative_mode              = false;
952         do_push_unused_livethroughs = true;
953         for (i = len-1; i >= 0; --i) {
954                 process_block_or_loop(loop_blocks[i]);
955         }
956         do_push_unused_livethroughs = false;
957
958         loop_info->max_register_pressure = max_register_pressure;
959         DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
960                                 (unsigned) max_register_pressure));
961
962         DEL_ARR_F(loop_blocks);
963 }
964
965 static void fix_block_borders(ir_node *block, void *data)
966 {
967         block_info_t *block_info     = get_block_info(block);
968         worklist_t   *start_worklist = block_info->start_worklist;
969         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
970         ir_loop      *loop           = get_irn_loop(block);
971         int           i;
972
973         (void) data;
974         assert(start_worklist != NULL);
975
976         for (i = 0; i < n_cfgpreds; ++i) {
977                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
978                 block_info_t *pred_block_info = get_block_info(pred_block);
979                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
980                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
981                 bool          is_loop_entry   = false;
982                 worklist_entry_t *entry;
983
984                 assert(end_worklist != NULL);
985
986                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
987                         is_loop_entry = true;
988                 }
989
990                 /* reload missing values */
991                 activate_worklist(end_worklist);
992
993                 foreach_worklist(entry, start_worklist) {
994                         ir_node *value = entry->value;
995
996                         if (!is_loop_entry && entry->unused_livethrough_loop != NULL)
997                                 continue;
998
999                         if (is_Phi(value) && get_nodes_block(value) == block) {
1000                                 value = get_irn_n(value, i);
1001
1002                                 /* we might have unknowns as argument for the phi */
1003                                 if (!arch_irn_consider_in_reg_alloc(cls, value))
1004                                         continue;
1005                         }
1006                         if (worklist_contains(value))
1007                                 continue;
1008                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
1009                 }
1010
1011                 deactivate_worklist(end_worklist);
1012         }
1013 }
1014
1015 /**
1016  * Run the spill algorithm.
1017  *
1018  * @param birg  the graph to run on
1019  * @param ncls  the register class to run on
1020  */
1021 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
1022 {
1023         ir_graph *irg = be_get_birg_irg(birg);
1024
1025         cls    = ncls;
1026         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1027
1028         /* shortcut for register classes with ignore regs only */
1029         if (n_regs == 0)
1030                 return;
1031
1032         worklist_visited = 0;
1033         exec_freq        = be_get_birg_exec_freq(birg);
1034
1035         be_clear_links(irg);
1036         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1037                         | IR_RESOURCE_LOOP_LINK);
1038         inc_irg_visited(irg);
1039
1040         obstack_init(&obst);
1041         senv = be_new_spill_env(birg);
1042
1043         assure_cf_loop(irg);
1044         clear_loop_info(get_irg_loop(irg));
1045         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
1046
1047         process_loop(get_irg_loop(irg));
1048
1049         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1050
1051         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1052                         | IR_RESOURCE_LOOP_LINK);
1053
1054         be_insert_spills_reloads(senv);
1055
1056         obstack_free(&obst, NULL);
1057
1058         /* clean up */
1059         be_delete_spill_env(senv);
1060 }
1061
1062 void be_init_spillbelady3(void)
1063 {
1064         static be_spiller_t belady3_spiller = {
1065                 be_spill_belady3
1066         };
1067
1068         be_register_spiller("belady3", &belady3_spiller);
1069         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1070 }
1071
1072 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);