amd64: small changes w.r.t. stack alignment.
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #include "config.h"
33
34 #include <stdbool.h>
35
36 #include "debug.h"
37 #include "list.h"
38 #include "pdeq.h"
39
40 #include "irnode_t.h"
41 #include "irprintf.h"
42 #include "iredges_t.h"
43 #include "irloop_t.h"
44 #include "irgwalk.h"
45 #include "execfreq.h"
46
47 #include "bemodule.h"
48 #include "bespill.h"
49 #include "beutil.h"
50 #include "bespillutil.h"
51 #include "besched.h"
52 #include "be_t.h"
53
54 #ifndef NDEBUG
55 #define EXPENSIVE_CHECKS
56 #endif
57
58 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
59
60 typedef struct loop_edge_t loop_edge_t;
61 struct loop_edge_t {
62         ir_node     *block;
63         int          pos;
64         loop_edge_t *next;
65 };
66
67 typedef struct loop_info_t loop_info_t;
68 struct loop_info_t {
69         ir_loop     *loop;
70         loop_edge_t *entry_edges;
71         loop_edge_t *exit_edges;
72         size_t       max_register_pressure;
73 };
74
75 typedef struct worklist_entry_t worklist_entry_t;
76 struct worklist_entry_t {
77         struct list_head  head;
78         ir_node          *value;
79         ir_node          *reload_point;
80         ir_loop          *unused_livethrough_loop;
81 };
82
83 #define foreach_worklist(entry, wl) list_for_each_entry(worklist_entry_t, entry, &(wl)->live_values, head)
84
85 typedef struct worklist_t worklist_t;
86 struct worklist_t {
87         struct list_head  live_values;
88         size_t            n_live_values;
89         ir_visited_t      visited;
90 };
91
92 typedef struct block_info_t block_info_t;
93 struct block_info_t {
94         worklist_t *start_worklist;
95         worklist_t *end_worklist;
96 };
97
98 /** The register class we currently run on. */
99 static const arch_register_class_t *cls;
100 static struct obstack               obst;
101 static spill_env_t                 *senv;
102 static size_t                       n_regs;
103 static size_t                       max_register_pressure;
104 static bool                         tentative_mode;
105 static bool                         do_push_unused_livethroughs;
106 /** Execution frequency for the current graph. */
107 static ir_exec_freq                *exec_freq;
108 static ir_visited_t                 worklist_visited;
109 #ifdef DEBUG_libfirm
110 static bool                         should_have_reached_fixpoint;
111 #endif
112
113 static worklist_t *new_worklist(void)
114 {
115         worklist_t *worklist = OALLOCZ(&obst, worklist_t);
116
117         INIT_LIST_HEAD(&worklist->live_values);
118         worklist->n_live_values    = 0;
119
120         return worklist;
121 }
122
123 static void mark_irn_not_visited(ir_node *node)
124 {
125         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
126 }
127
128 static bool worklist_contains(const ir_node *node)
129 {
130         return irn_visited(node);
131 }
132
133 static block_info_t *get_block_info(ir_node *block)
134 {
135         block_info_t *info = get_irn_link(block);
136         if (info != NULL)
137                 return info;
138
139         info = OALLOCZ(&obst, block_info_t);
140         set_irn_link(block, info);
141         return info;
142 }
143
144 static void deactivate_worklist(const worklist_t *worklist)
145 {
146         worklist_entry_t *entry;
147
148         foreach_worklist(entry, worklist) {
149                 assert(worklist_contains(entry->value));
150                 mark_irn_not_visited(entry->value);
151                 set_irn_link(entry->value, NULL);
152         }
153 }
154
155 static void activate_worklist(const worklist_t *worklist)
156 {
157         worklist_entry_t *entry;
158
159         foreach_worklist(entry, worklist) {
160                 assert(!worklist_contains(entry->value));
161                 mark_irn_visited(entry->value);
162                 set_irn_link(entry->value, entry);
163         }
164 }
165
166 /**
167  * duplicate a worklist and directly activates it
168  */
169 static void fill_and_activate_worklist(worklist_t *new_worklist,
170                 const worklist_t *worklist, ir_node *block, ir_node *succ_block,
171                 int succ_pos)
172 {
173         ir_node          *reload_point  = NULL;
174         size_t            n_live_values = 0;
175         worklist_entry_t *entry;
176
177         if (succ_block != NULL &&
178                         (get_Block_n_cfgpreds(succ_block) > 1
179                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
180                 reload_point = be_get_end_of_block_insertion_point(block);
181         }
182
183         foreach_worklist(entry, worklist) {
184                 ir_node          *value = entry->value;
185                 worklist_entry_t *new_entry;
186
187                 if (new_worklist->n_live_values >= n_regs)
188                         break;
189
190                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
191                         value = get_Phi_pred(value, succ_pos);
192
193                         /* can happen for unknown phi preds */
194                         if (!arch_irn_consider_in_reg_alloc(cls, value))
195                                 continue;
196                 }
197
198                 if (irn_visited_else_mark(value))
199                         continue;
200
201                 new_entry = OALLOCZ(&obst, worklist_entry_t);
202
203                 new_entry->value = value;
204                 if (reload_point != NULL) {
205                         new_entry->reload_point = reload_point;
206                 } else {
207                         new_entry->reload_point = entry->reload_point;
208                 }
209
210                 list_add_tail(&new_entry->head, &new_worklist->live_values);
211                 ++n_live_values;
212
213                 set_irn_link(value, new_entry);
214                 new_worklist->n_live_values++;
215         }
216 }
217
218 static worklist_t *duplicate_worklist(const worklist_t *worklist)
219 {
220         worklist_t       *new_worklist;
221         struct list_head *entry;
222
223         new_worklist = OALLOCZ(&obst, worklist_t);
224         INIT_LIST_HEAD(&new_worklist->live_values);
225         new_worklist->n_live_values = worklist->n_live_values;
226
227         list_for_each(entry, &worklist->live_values) {
228                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
229                 worklist_entry_t *new_entry     = OALLOC(&obst, worklist_entry_t);
230
231                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
232                 list_add_tail(&new_entry->head, &new_worklist->live_values);
233         }
234
235         return new_worklist;
236 }
237
238 #ifdef DEBUG_libfirm
239 static void print_worklist(const worklist_t *worklist, int level)
240 {
241         struct list_head *entry;
242
243         DB((dbg, level, "%d values: ", worklist->n_live_values));
244         list_for_each(entry, &worklist->live_values) {
245                 worklist_entry_t *wl_entry
246                         = list_entry(entry, worklist_entry_t, head);
247
248                 DB((dbg, level, "%+F ", wl_entry->value));
249         }
250 }
251 #endif
252
253
254 /**
255  * Clear the link fields of a loop and all
256  * its child loops.
257  *
258  * @param loop  the root loop
259  */
260 static void clear_loop_info(ir_loop *loop)
261 {
262         int n_elements = get_loop_n_elements(loop);
263         int i;
264
265         loop->link = NULL;
266         for (i = 0; i < n_elements; ++i) {
267                 loop_element element = get_loop_element(loop, i);
268                 if (*element.kind != k_ir_loop)
269                         continue;
270
271                 clear_loop_info(element.son);
272         }
273 }
274
275 static loop_info_t *get_loop_info(ir_loop *loop)
276 {
277         loop_info_t *info = loop->link;
278         if (info != NULL)
279                 return info;
280
281         info = OALLOCZ(&obst, loop_info_t);
282         info->loop = loop;
283         loop->link = info;
284         return info;
285 }
286
287 /**
288  * constructs loop in+out edges when called in a block walker
289  */
290 static void construct_loop_edges(ir_node *block, void *data)
291 {
292         int      n_cfgpreds = get_Block_n_cfgpreds(block);
293         ir_loop *loop       = get_irn_loop(block);
294         int      i;
295
296         (void) data;
297
298         for (i = 0; i < n_cfgpreds; ++i) {
299                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
300                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
301                 loop_edge_t *edge;
302                 bool        is_exit_edge;
303                 ir_loop     *l, *goal;
304
305                 if (cfgpred_loop == loop)
306                         continue;
307
308                 /* critical edges are splitted, so we can't jump from 1 loop
309                  * directly into another without going out of it */
310                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
311
312                 /* edge out of loop */
313                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
314                         is_exit_edge = true;
315                         l            = cfgpred_loop;
316                         goal         = loop;
317                 } else {
318                         is_exit_edge = false;
319                         l            = loop;
320                         goal         = cfgpred_loop;
321                 }
322
323                 /* this might be a jump out/in multiple loops */
324                 do {
325                         loop_info_t *l_info = get_loop_info(l);
326
327                         edge = OALLOCZ(&obst, loop_edge_t);
328                         edge->block = block;
329                         edge->pos   = i;
330
331                         if (is_exit_edge) {
332                                 edge->next         = l_info->exit_edges;
333                                 l_info->exit_edges = edge;
334                                 assert(get_loop_depth(loop) < get_loop_depth(l));
335                         } else {
336                                 edge->next          = l_info->entry_edges;
337                                 l_info->entry_edges = edge;
338                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
339                         }
340                         l = get_loop_outer_loop(l);
341                 } while (l != goal);
342         }
343 }
344
345
346
347
348 static void place_reload(worklist_entry_t *entry)
349 {
350         if (tentative_mode)
351                 return;
352
353         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
354                 entry->reload_point));
355         assert(entry->reload_point != NULL);
356         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
357         entry->reload_point = NULL;
358 }
359
360 /**
361  * makes sure the worklist contains not more than n_regs - room_needed entries
362  */
363 static void make_room(worklist_t *worklist, size_t room_needed)
364 {
365         int               i;
366         int               spills_needed;
367         struct list_head *entry;
368
369         spills_needed = worklist->n_live_values + room_needed - n_regs;
370         if (spills_needed <= 0)
371                 return;
372
373         entry = worklist->live_values.next;
374         for (i = spills_needed; i > 0; --i) {
375                 struct list_head *next = entry->next;
376                 worklist_entry_t *wl_entry
377                         = list_entry(entry, worklist_entry_t, head);
378
379                 assert(worklist_contains(wl_entry->value));
380                 mark_irn_not_visited(wl_entry->value);
381                 place_reload(wl_entry);
382                 list_del(entry);
383
384                 entry = next;
385         }
386         worklist->n_live_values -= spills_needed;
387 }
388
389 /**
390  * a value was used, so bring it to the back of the worklist (which might
391  * result in a spill of another value).
392  */
393 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
394 {
395         /* already in the worklist? move around, otherwise add at back */
396         worklist_entry_t *entry = get_irn_link(value);
397
398         assert(arch_irn_consider_in_reg_alloc(cls, value));
399
400         if (worklist_contains(value)) {
401                 assert(entry != NULL);
402
403                 list_del(&entry->head);
404         } else {
405                 if (entry == NULL) {
406                         entry        = OALLOCZ(&obst, worklist_entry_t);
407                         entry->value = value;
408                         set_irn_link(value, entry);
409                 }
410
411                 ++worklist->n_live_values;
412                 mark_irn_visited(value);
413         }
414
415         entry->reload_point = sched_point;
416         list_add_tail(&entry->head, &worklist->live_values);
417 }
418
419 static void worklist_remove(worklist_t *worklist, ir_node *value)
420 {
421         worklist_entry_t *entry = get_irn_link(value);
422         assert(entry != NULL);
423         list_del(&entry->head);
424         --worklist->n_live_values;
425
426         assert(worklist_contains(value));
427         mark_irn_not_visited(value);
428 }
429
430 static void update_max_pressure(worklist_t *worklist)
431 {
432         if (worklist->n_live_values > max_register_pressure)
433                 max_register_pressure = worklist->n_live_values;
434 }
435
436 static void do_spilling(ir_node *block, worklist_t *worklist)
437 {
438         ir_node *node;
439
440         assert(worklist != NULL);
441
442         sched_foreach_reverse(block, node) {
443                 int    i, arity;
444                 size_t n_defs = 0;
445
446                 DB((dbg, LEVEL_2, "\t%+F... ", node));
447                 update_max_pressure(worklist);
448
449                 if (is_Phi(node)) {
450                         ir_node *node2;
451                         /* TODO: if we have some free registers, then we could decide to
452                          * not spill some phis (but not for phis where at least 1 input is
453                          * themselfes) */
454
455                         /* we have to spill all phis that are not live */
456                         sched_foreach_reverse_from(node, node2) {
457                                 assert(is_Phi(node2));
458
459                                 if (worklist_contains(node2))
460                                         continue;
461                                 if (!arch_irn_consider_in_reg_alloc(cls, node2))
462                                         continue;
463
464                                 if (!tentative_mode)
465                                         be_spill_phi(senv, node2);
466                         }
467                         DB((dbg, LEVEL_2, "\n"));
468                         break;
469                 }
470
471                 /* remove values defined by this instruction from the workset. Values
472                  * defined but not in the workset need free registers */
473                 if (get_irn_mode(node) == mode_T) {
474                         const ir_edge_t *edge;
475
476                         foreach_out_edge(node, edge) {
477                                 ir_node *proj = get_edge_src_irn(edge);
478                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
479                                         continue;
480                                 if (worklist_contains(proj)) {
481                                         worklist_remove(worklist, proj);
482                                 } else {
483                                         ++n_defs;
484                                 }
485                         }
486                 } else if (arch_irn_consider_in_reg_alloc(cls, node)) {
487                         if (worklist_contains(node)) {
488                                 worklist_remove(worklist, node);
489                         } else {
490                                 n_defs = 1;
491                         }
492                 }
493
494                 /* make sure we have enough free registers for the spills */
495                 make_room(worklist, n_defs);
496
497                 /* put all values used by the instruction into the workset */
498                 arity = get_irn_arity(node);
499                 for (i = 0; i < arity; ++i) {
500                         ir_node *use = get_irn_n(node, i);
501
502                         if (!arch_irn_consider_in_reg_alloc(cls, use))
503                                 continue;
504
505                         val_used(worklist, use, node);
506                 }
507
508                 /* we might have too many values in the worklist now and need to spill
509                  * some */
510                 make_room(worklist, 0);
511
512 #ifdef DEBUG_libfirm
513                 print_worklist(worklist, LEVEL_2);
514                 DB((dbg, LEVEL_2, "\n"));
515 #endif
516         }
517
518         update_max_pressure(worklist);
519
520 #ifdef DEBUG_libfirm
521         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
522         print_worklist(worklist, LEVEL_1);
523         DB((dbg, LEVEL_1, "\n"));
524 #endif
525 }
526
527 #ifdef DEBUG_libfirm
528 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
529 {
530         const struct list_head *i1 = &wl1->live_values;
531         const struct list_head *i2 = &wl2->live_values;
532
533         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
534                         i1 = i1->next, i2 = i2->next) {
535                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
536                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
537
538                 if (entry1->value != entry2->value)
539                         return false;
540         }
541         /* both lists at the end */
542         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
543                 return false;
544
545         return true;
546 }
547 #endif
548
549 static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
550 {
551         double           best_execfreq   = -1;
552         worklist_t      *best_worklist   = NULL;
553         ir_node         *best_succ_block = NULL;
554         int              best_pos        = -1;
555         const ir_edge_t *edge;
556
557         /* construct worklist */
558         foreach_block_succ(block, edge) {
559                 ir_node      *succ_block = get_edge_src_irn(edge);
560                 double       execfreq    = get_block_execfreq(exec_freq, succ_block);
561                 block_info_t *block_info;
562                 worklist_t   *succ_worklist;
563
564                 if (execfreq < best_execfreq)
565                         continue;
566
567                 block_info    = get_block_info(succ_block);
568                 succ_worklist = block_info->start_worklist;
569
570                 if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
571                         continue;
572
573                 best_execfreq   = execfreq;
574                 best_worklist   = succ_worklist;
575                 best_succ_block = succ_block;
576                 best_pos        = get_edge_src_pos(edge);
577         }
578
579         if (best_worklist == NULL)
580                 return false;
581         best_worklist->visited = worklist_visited;
582
583         fill_and_activate_worklist(new_worklist, best_worklist, block,
584                         best_succ_block, best_pos);
585         return true;
586 }
587
588 static worklist_t *construct_start_worklist(ir_node *block)
589 {
590         worklist_t *worklist = new_worklist();
591
592         ++worklist_visited;
593
594         while (fill_start_worklist(worklist, block)) {
595                 if (worklist->n_live_values >= n_regs)
596                         break;
597         }
598
599         return worklist;
600 }
601
602 static void process_block(ir_node *block, void *env)
603 {
604         block_info_t    *block_info;
605         worklist_t      *worklist;
606         int              n_preds;
607
608         (void) env;
609         DB((dbg, LEVEL_1, "Processing %+F\n", block));
610
611         worklist   = construct_start_worklist(block);
612         block_info = get_block_info(block);
613
614 #ifdef DEBUG_libfirm
615         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
616         print_worklist(worklist, LEVEL_1);
617         DB((dbg, LEVEL_1, "\n"));
618
619         if (should_have_reached_fixpoint) {
620                 assert(worklists_equal(block_info->end_worklist, worklist));
621         }
622 #endif
623         block_info->end_worklist = duplicate_worklist(worklist);
624
625         do_spilling(block, worklist);
626         deactivate_worklist(worklist);
627
628 #ifdef DEBUG_libfirm
629         if (should_have_reached_fixpoint) {
630                 assert(worklists_equal(block_info->start_worklist, worklist));
631         }
632 #endif
633         block_info->start_worklist = worklist;
634
635         /* we shouldn't have any live values left at the start block */
636         n_preds = get_Block_n_cfgpreds(block);
637         //assert(n_preds != 0 || worklist->n_live_values == 0);
638 }
639
640 typedef union block_or_loop_t block_or_loop_t;
641 union block_or_loop_t {
642         ir_node *block;
643         ir_loop *loop;
644 };
645
646 static block_or_loop_t *loop_blocks;
647 static ir_loop         *current_loop;
648
649 static void find_blocks(ir_node *block);
650
651 static void find_in_loop(ir_loop *loop, ir_node *entry)
652 {
653         loop_info_t     *loop_info = get_loop_info(loop);
654         block_or_loop_t block_or_loop;
655         loop_edge_t     *edge;
656
657         /* simply mark 1 block in the loop to indicate that the loop was already
658          * processed */
659         ir_node *some_block = loop_info->entry_edges->block;
660         if (Block_block_visited(some_block))
661                 return;
662
663         block_or_loop.loop = loop;
664         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
665
666 #ifndef NDEBUG
667         {
668                 /* we should be 1 of the entry blocks */
669                 loop_edge_t *edge  = loop_info->entry_edges;
670                 bool         found = false;
671                 for ( ; edge != NULL; edge = edge->next) {
672                         if (edge->block == entry) {
673                                 found = true;
674                                 break;
675                         }
676                 }
677                 assert(found);
678         }
679 #else
680         (void) entry;
681 #endif
682
683         /* check all loop successors */
684         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
685                 ir_node *succ      = edge->block;
686                 ir_loop *succ_loop = get_irn_loop(succ);
687
688                 if (succ_loop == current_loop) {
689                         find_blocks(succ);
690                 } else {
691                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
692                 }
693         }
694 }
695
696 static void find_blocks(ir_node *block)
697 {
698         const ir_edge_t *edge;
699         block_or_loop_t block_or_loop;
700
701         if (Block_block_visited(block))
702                 return;
703
704         block_or_loop.block = block;
705
706         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
707         mark_Block_block_visited(block);
708
709         foreach_block_succ(block, edge) {
710                 ir_node *succ = get_edge_src_irn(edge);
711
712                 /* is it part of the current loop? */
713                 ir_loop *loop = get_irn_loop(succ);
714                 if (loop != current_loop) {
715                         /* a sub-loop? */
716                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
717                                 find_in_loop(loop, succ);
718                         } else {
719                                 /* parent loop: we're not interested in the block */
720                         }
721                 } else {
722                         find_blocks(succ);
723                 }
724         }
725 }
726
727 /**
728  * append an entry to a worklist. WARNING: The entry must not already be in the
729  * worklist.
730  */
731 static void worklist_append(worklist_t *worklist, ir_node *value,
732                             ir_node *reload_point,
733                                                         ir_loop *unused_livethrough_loop)
734 {
735         worklist_entry_t *entry;
736
737 #ifdef EXPENSIVE_CHECKS
738         foreach_worklist(entry, worklist) {
739                 assert(entry->value != value);
740         }
741 #endif
742
743         entry = OALLOCZ(&obst, worklist_entry_t);
744         entry->value                   = value;
745         entry->reload_point            = reload_point;
746         entry->unused_livethrough_loop = unused_livethrough_loop;
747         list_add_tail(&entry->head, &worklist->live_values);
748         ++worklist->n_live_values;
749         assert(worklist->n_live_values <= n_regs);
750 }
751
752 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
753 {
754         loop_edge_t *edge;
755         ++worklist_visited;
756
757         /* add the value to all loop exit and entry blocks */
758         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
759                 ir_node            *block
760                         = get_Block_cfgpred_block(edge->block, edge->pos);
761                 const block_info_t *info     = get_block_info(block);
762                 worklist_t         *worklist = info->end_worklist;
763                 ir_node            *reload_point = NULL;
764
765                 if (worklist->visited >= worklist_visited)
766                         continue;
767                 worklist->visited = worklist_visited;
768
769                 /* TODO: we need a smarter mechanism here, that makes the reloader place
770                  * reload nodes on all loop exits... */
771
772                 worklist_append(worklist, value, reload_point, loop_info->loop);
773         }
774         edge = loop_info->entry_edges;
775         for ( ; edge != NULL; edge = edge->next) {
776                 ir_node            *entry_block = edge->block;
777                 const block_info_t *info        = get_block_info(entry_block);
778                 worklist_t         *worklist    = info->start_worklist;
779                 ir_node            *pred_block;
780                 ir_node            *reload_point;
781
782                 if (worklist->visited >= worklist_visited)
783                         continue;
784                 worklist->visited = worklist_visited;
785
786                 pred_block   = get_Block_cfgpred_block(entry_block, edge->pos);
787                 reload_point = be_get_end_of_block_insertion_point(pred_block);
788
789                 worklist_append(worklist, value, reload_point, loop_info->loop);
790         }
791
792         set_irn_link(value, NULL);
793         ++loop_info->max_register_pressure;
794 }
795
796 static void push_unused_livethroughs(loop_info_t *loop_info)
797 {
798         loop_edge_t *edge;
799
800         /* we can only push unused livethroughs if register pressure inside the loop
801          * was low enough */
802         if (loop_info->max_register_pressure >= n_regs)
803                 return;
804
805         /* find unused livethroughs: register pressure in the loop was low enough
806          * which means that we had no spills which implies that at every point in
807          * the loop all*/
808         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
809                 ir_node            *block          = edge->block;
810                 const block_info_t *info           = get_block_info(block);
811                 worklist_t         *start_worklist = info->start_worklist;
812                 ir_node            *exit_block;
813                 const block_info_t *exit_info;
814                 worklist_t         *end_worklist;
815                 struct list_head   *entry;
816
817                 if (start_worklist == NULL)
818                         continue;
819
820                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
821                 exit_info    = get_block_info(exit_block);
822                 end_worklist = exit_info->end_worklist;
823
824                 activate_worklist(end_worklist);
825                 /* all values contained in the start_worklist, which are not available
826                  * in the end_worklist, must be unused livethroughs */
827
828                 list_for_each(entry, &start_worklist->live_values) {
829                         worklist_entry_t *wl_entry
830                                 = list_entry(entry, worklist_entry_t, head);
831                         ir_node *value = wl_entry->value;
832
833                         if (loop_info->max_register_pressure >= n_regs)
834                                 break;
835
836
837                         if (!worklist_contains(value)) {
838                                 /* add it to all loop exits */
839                                 DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
840                                 push_unused_livethrough(loop_info, value);
841                                 /* the value should now be part of end_worklist, so mark it */
842                                 mark_irn_visited(value);
843                         }
844                 }
845
846                 deactivate_worklist(end_worklist);
847         }
848 }
849
850 static void process_block_or_loop(const block_or_loop_t block_or_loop)
851 {
852         if (block_or_loop.loop->kind == k_ir_loop) {
853                 loop_info_t *loop_info = get_loop_info(block_or_loop.loop);
854
855                 if (do_push_unused_livethroughs)
856                         push_unused_livethroughs(loop_info);
857
858                 if (loop_info->max_register_pressure > max_register_pressure)
859                         max_register_pressure = loop_info->max_register_pressure;
860
861                 return;
862         }
863         process_block(block_or_loop.block, NULL);
864 }
865
866 static void process_loop(ir_loop *loop)
867 {
868         int         n_elements = get_loop_n_elements(loop);
869         int         i, len;
870         loop_info_t *loop_info;
871         loop_edge_t *edge;
872         ir_node     *some_block;
873
874         /* first handle all sub-loops */
875         for (i = 0; i < n_elements; ++i) {
876                 loop_element element = get_loop_element(loop, i);
877                 if (*element.kind != k_ir_loop)
878                         continue;
879
880                 process_loop(element.son);
881         }
882
883         /* create a postorder of the blocks */
884         loop_info = get_loop_info(loop);
885         edge      = loop_info->entry_edges;
886         if (edge != NULL) {
887                 some_block = edge->block;
888         } else {
889                 assert(loop == get_irg_loop(current_ir_graph));
890                 some_block = get_irg_start_block(current_ir_graph);
891         }
892
893         loop_blocks  = NEW_ARR_F(block_or_loop_t, 0);
894         current_loop = loop;
895
896         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
897         inc_irg_block_visited(current_ir_graph);
898         find_blocks(some_block);
899         /* for endless loops the end-block might be unreachable */
900         if (loop == get_irg_loop(current_ir_graph)) {
901                 find_blocks(get_irg_end_block(current_ir_graph));
902         }
903         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
904
905         DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
906         len = ARR_LEN(loop_blocks);
907         for (i = 0; i < len; ++i) {
908                 block_or_loop_t block_or_loop = loop_blocks[i];
909                 if (block_or_loop.loop->kind == k_ir_loop) {
910                         DB((dbg, LEVEL_3, " L-%p", block_or_loop.loop));
911                 } else {
912                         DB((dbg, LEVEL_3, " B-%+F", block_or_loop.block));
913                 }
914         }
915         DB((dbg, LEVEL_3, "\n"));
916
917         max_register_pressure = 0;
918
919         /* run1: tentative phase */
920         tentative_mode = true;
921         for (i = len-1; i >= 0; --i) {
922                 process_block_or_loop(loop_blocks[i]);
923         }
924
925         /* run2: tentative phase - should reach fixpoint */
926         tentative_mode = true;
927         for (i = len-1; i >= 0; --i) {
928                 process_block_or_loop(loop_blocks[i]);
929         }
930
931 #ifdef DEBUG_libfirm
932         /* run3: tentative phase - check fixpoint */
933         tentative_mode               = true;
934         should_have_reached_fixpoint = true;
935         for (i = len-1; i >= 0; --i) {
936                 process_block_or_loop(loop_blocks[i]);
937         }
938         should_have_reached_fixpoint = false;
939 #endif
940
941         /* run4: add spills/reloads */
942         tentative_mode              = false;
943         do_push_unused_livethroughs = true;
944         for (i = len-1; i >= 0; --i) {
945                 process_block_or_loop(loop_blocks[i]);
946         }
947         do_push_unused_livethroughs = false;
948
949         loop_info->max_register_pressure = max_register_pressure;
950         DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
951                                 (unsigned) max_register_pressure));
952
953         DEL_ARR_F(loop_blocks);
954 }
955
956 static void fix_block_borders(ir_node *block, void *data)
957 {
958         block_info_t *block_info     = get_block_info(block);
959         worklist_t   *start_worklist = block_info->start_worklist;
960         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
961         ir_loop      *loop           = get_irn_loop(block);
962         int           i;
963
964         (void) data;
965         assert(start_worklist != NULL);
966
967         for (i = 0; i < n_cfgpreds; ++i) {
968                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
969                 block_info_t *pred_block_info = get_block_info(pred_block);
970                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
971                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
972                 bool          is_loop_entry   = false;
973                 worklist_entry_t *entry;
974
975                 assert(end_worklist != NULL);
976
977                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
978                         is_loop_entry = true;
979                 }
980
981                 /* reload missing values */
982                 activate_worklist(end_worklist);
983
984                 foreach_worklist(entry, start_worklist) {
985                         ir_node *value = entry->value;
986
987                         if (!is_loop_entry && entry->unused_livethrough_loop != NULL)
988                                 continue;
989
990                         if (is_Phi(value) && get_nodes_block(value) == block) {
991                                 value = get_irn_n(value, i);
992
993                                 /* we might have unknowns as argument for the phi */
994                                 if (!arch_irn_consider_in_reg_alloc(cls, value))
995                                         continue;
996                         }
997                         if (worklist_contains(value))
998                                 continue;
999                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
1000                 }
1001
1002                 deactivate_worklist(end_worklist);
1003         }
1004 }
1005
1006 /**
1007  * Run the spill algorithm.
1008  *
1009  * @param birg  the graph to run on
1010  * @param ncls  the register class to run on
1011  */
1012 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
1013 {
1014         ir_graph *irg = be_get_birg_irg(birg);
1015
1016         cls    = ncls;
1017         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1018
1019         /* shortcut for register classes with ignore regs only */
1020         if (n_regs == 0)
1021                 return;
1022
1023         worklist_visited = 0;
1024         exec_freq        = be_get_birg_exec_freq(birg);
1025
1026         be_clear_links(irg);
1027         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1028                         | IR_RESOURCE_LOOP_LINK);
1029         inc_irg_visited(irg);
1030
1031         obstack_init(&obst);
1032         senv = be_new_spill_env(birg);
1033
1034         assure_cf_loop(irg);
1035         clear_loop_info(get_irg_loop(irg));
1036         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
1037
1038         process_loop(get_irg_loop(irg));
1039
1040         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1041
1042         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1043                         | IR_RESOURCE_LOOP_LINK);
1044
1045         be_insert_spills_reloads(senv);
1046
1047         obstack_free(&obst, NULL);
1048
1049         /* clean up */
1050         be_delete_spill_env(senv);
1051 }
1052
1053 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);
1054 void be_init_spillbelady3(void)
1055 {
1056         static be_spiller_t belady3_spiller = {
1057                 be_spill_belady3
1058         };
1059
1060         be_register_spiller("belady3", &belady3_spiller);
1061         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1062 }