Add arch_get_register_req_out().
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #include "config.h"
33
34 #include <stdbool.h>
35
36 #include "debug.h"
37 #include "list.h"
38 #include "pdeq.h"
39
40 #include "irnode_t.h"
41 #include "irprintf.h"
42 #include "iredges_t.h"
43 #include "irloop_t.h"
44 #include "irgwalk.h"
45 #include "execfreq.h"
46
47 #include "bemodule.h"
48 #include "bespill.h"
49 #include "beutil.h"
50 #include "bespilloptions.h"
51 #include "besched_t.h"
52 #include "be_t.h"
53
54 #ifndef NDEBUG
55 #define EXPENSIVE_CHECKS
56 #endif
57
58 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
59
60 typedef struct loop_edge_t loop_edge_t;
61 struct loop_edge_t {
62         ir_node     *block;
63         int          pos;
64         loop_edge_t *next;
65 };
66
67 typedef struct loop_info_t loop_info_t;
68 struct loop_info_t {
69         ir_loop     *loop;
70         loop_edge_t *entry_edges;
71         loop_edge_t *exit_edges;
72         size_t       max_register_pressure;
73 };
74
75 typedef struct worklist_entry_t worklist_entry_t;
76 struct worklist_entry_t {
77         struct list_head  head;
78         ir_node          *value;
79         ir_node          *reload_point;
80         ir_loop          *unused_livethrough_loop;
81 };
82
83 #define foreach_worklist(entry, wl) list_for_each_entry(worklist_entry_t, entry, &(wl)->live_values, head)
84
85 typedef struct worklist_t worklist_t;
86 struct worklist_t {
87         struct list_head  live_values;
88         size_t            n_live_values;
89         ir_visited_t      visited;
90 };
91
92 typedef struct block_info_t block_info_t;
93 struct block_info_t {
94         worklist_t *start_worklist;
95         worklist_t *end_worklist;
96 };
97
98 /** The register class we currently run on. */
99 static const arch_register_class_t *cls;
100 static struct obstack               obst;
101 static spill_env_t                 *senv;
102 static size_t                       n_regs;
103 static size_t                       max_register_pressure;
104 static bool                         tentative_mode;
105 static bool                         should_have_reached_fixpoint;
106 static bool                         do_push_unused_livethroughs;
107 /** Execution frequency for the current graph. */
108 static ir_exec_freq                *exec_freq;
109 static ir_visited_t                 worklist_visited;
110
111 static worklist_t *new_worklist(void)
112 {
113         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
114         memset(worklist, 0, sizeof(worklist[0]));
115
116         INIT_LIST_HEAD(&worklist->live_values);
117         worklist->n_live_values    = 0;
118
119         return worklist;
120 }
121
122 static void mark_irn_not_visited(ir_node *node)
123 {
124         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
125 }
126
127 static bool worklist_contains(const ir_node *node)
128 {
129         return irn_visited(node);
130 }
131
132 static block_info_t *get_block_info(ir_node *block)
133 {
134         block_info_t *info = get_irn_link(block);
135         if (info != NULL)
136                 return info;
137
138         info = obstack_alloc(&obst, sizeof(info[0]));
139         memset(info, 0, sizeof(info[0]));
140         set_irn_link(block, info);
141         return info;
142 }
143
144 static void deactivate_worklist(const worklist_t *worklist)
145 {
146         worklist_entry_t *entry;
147
148         foreach_worklist(entry, worklist) {
149                 assert(worklist_contains(entry->value));
150                 mark_irn_not_visited(entry->value);
151                 set_irn_link(entry->value, NULL);
152         }
153 }
154
155 static void activate_worklist(const worklist_t *worklist)
156 {
157         worklist_entry_t *entry;
158
159         foreach_worklist(entry, worklist) {
160                 assert(!worklist_contains(entry->value));
161                 mark_irn_visited(entry->value);
162                 set_irn_link(entry->value, entry);
163         }
164 }
165
166 /**
167  * duplicate a worklist and directly activates it
168  */
169 static void fill_and_activate_worklist(worklist_t *new_worklist,
170                 const worklist_t *worklist, ir_node *block, ir_node *succ_block,
171                 int succ_pos)
172 {
173         ir_node          *reload_point  = NULL;
174         size_t            n_live_values = 0;
175         worklist_entry_t *entry;
176
177         if (succ_block != NULL &&
178                         (get_Block_n_cfgpreds(succ_block) > 1
179                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
180                 reload_point = be_get_end_of_block_insertion_point(block);
181         }
182
183         foreach_worklist(entry, worklist) {
184                 ir_node          *value = entry->value;
185                 worklist_entry_t *new_entry;
186
187                 if (new_worklist->n_live_values >= n_regs)
188                         break;
189
190                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
191                         value = get_Phi_pred(value, succ_pos);
192
193                         /* can happen for unknown phi preds */
194                         if (!arch_irn_consider_in_reg_alloc(cls, value))
195                                 continue;
196                 }
197
198                 if (irn_visited_else_mark(value))
199                         continue;
200
201                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
202                 memset(new_entry, 0, sizeof(new_entry[0]));
203
204                 new_entry->value = value;
205                 if (reload_point != NULL) {
206                         new_entry->reload_point = reload_point;
207                 } else {
208                         new_entry->reload_point = entry->reload_point;
209                 }
210
211                 list_add_tail(&new_entry->head, &new_worklist->live_values);
212                 ++n_live_values;
213
214                 set_irn_link(value, new_entry);
215                 new_worklist->n_live_values++;
216         }
217 }
218
219 static worklist_t *duplicate_worklist(const worklist_t *worklist)
220 {
221         worklist_t       *new_worklist;
222         struct list_head *entry;
223
224         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
225         memset(new_worklist, 0, sizeof(new_worklist[0]));
226         INIT_LIST_HEAD(&new_worklist->live_values);
227         new_worklist->n_live_values = worklist->n_live_values;
228
229         list_for_each(entry, &worklist->live_values) {
230                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
231                 worklist_entry_t *new_entry
232                         = obstack_alloc(&obst, sizeof(new_entry[0]));
233
234                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
235                 list_add_tail(&new_entry->head, &new_worklist->live_values);
236         }
237
238         return new_worklist;
239 }
240
241 #ifdef DEBUG_libfirm
242 static void print_worklist(const worklist_t *worklist, int level)
243 {
244         struct list_head *entry;
245
246         DB((dbg, level, "%d values: ", worklist->n_live_values));
247         list_for_each(entry, &worklist->live_values) {
248                 worklist_entry_t *wl_entry
249                         = list_entry(entry, worklist_entry_t, head);
250
251                 DB((dbg, level, "%+F ", wl_entry->value));
252         }
253 }
254 #endif
255
256
257 /**
258  * Clear the link fields of a loop and all
259  * its child loops.
260  *
261  * @param loop  the root loop
262  */
263 static void clear_loop_info(ir_loop *loop)
264 {
265         int n_elements = get_loop_n_elements(loop);
266         int i;
267
268         loop->link = NULL;
269         for (i = 0; i < n_elements; ++i) {
270                 loop_element element = get_loop_element(loop, i);
271                 if (*element.kind != k_ir_loop)
272                         continue;
273
274                 clear_loop_info(element.son);
275         }
276 }
277
278 static loop_info_t *get_loop_info(ir_loop *loop)
279 {
280         loop_info_t *info = loop->link;
281         if (info != NULL)
282                 return info;
283
284         info = obstack_alloc(&obst, sizeof(info[0]));
285         memset(info, 0, sizeof(info[0]));
286         info->loop = loop;
287         loop->link = info;
288         return info;
289 }
290
291 /**
292  * constructs loop in+out edges when called in a block walker
293  */
294 static void construct_loop_edges(ir_node *block, void *data)
295 {
296         int      n_cfgpreds = get_Block_n_cfgpreds(block);
297         ir_loop *loop       = get_irn_loop(block);
298         int      i;
299
300         (void) data;
301
302         for (i = 0; i < n_cfgpreds; ++i) {
303                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
304                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
305                 loop_edge_t *edge;
306                 bool        is_exit_edge;
307                 ir_loop     *l, *goal;
308
309                 if (cfgpred_loop == loop)
310                         continue;
311
312                 /* critical edges are splitted, so we can't jump from 1 loop
313                  * directly into another without going out of it */
314                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
315
316                 /* edge out of loop */
317                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
318                         is_exit_edge = true;
319                         l            = cfgpred_loop;
320                         goal         = loop;
321                 } else {
322                         is_exit_edge = false;
323                         l            = loop;
324                         goal         = cfgpred_loop;
325                 }
326
327                 /* this might be a jump out/in multiple loops */
328                 do {
329                         loop_info_t *l_info = get_loop_info(l);
330
331                         edge = obstack_alloc(&obst, sizeof(edge[0]));
332                         memset(edge, 0, sizeof(edge[0]));
333                         edge->block = block;
334                         edge->pos   = i;
335
336                         if (is_exit_edge) {
337                                 edge->next         = l_info->exit_edges;
338                                 l_info->exit_edges = edge;
339                                 assert(get_loop_depth(loop) < get_loop_depth(l));
340                         } else {
341                                 edge->next          = l_info->entry_edges;
342                                 l_info->entry_edges = edge;
343                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
344                         }
345                         l = get_loop_outer_loop(l);
346                 } while (l != goal);
347         }
348 }
349
350
351
352
353 static void place_reload(worklist_entry_t *entry)
354 {
355         if (tentative_mode)
356                 return;
357
358         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
359                 entry->reload_point));
360         assert(entry->reload_point != NULL);
361         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
362         entry->reload_point = NULL;
363 }
364
365 /**
366  * makes sure the worklist contains not more than n_regs - room_needed entries
367  */
368 static void make_room(worklist_t *worklist, size_t room_needed)
369 {
370         int               i;
371         int               spills_needed;
372         struct list_head *entry;
373
374         spills_needed = worklist->n_live_values + room_needed - n_regs;
375         if (spills_needed <= 0)
376                 return;
377
378         entry = worklist->live_values.next;
379         for(i = spills_needed; i > 0; --i) {
380                 struct list_head *next = entry->next;
381                 worklist_entry_t *wl_entry
382                         = list_entry(entry, worklist_entry_t, head);
383
384                 assert(worklist_contains(wl_entry->value));
385                 mark_irn_not_visited(wl_entry->value);
386                 place_reload(wl_entry);
387                 list_del(entry);
388
389                 entry = next;
390         }
391         worklist->n_live_values -= spills_needed;
392 }
393
394 /**
395  * a value was used, so bring it to the back of the worklist (which might
396  * result in a spill of another value).
397  */
398 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
399 {
400         /* already in the worklist? move around, otherwise add at back */
401         worklist_entry_t *entry = get_irn_link(value);
402
403         assert(arch_irn_consider_in_reg_alloc(cls, value));
404
405         if (worklist_contains(value)) {
406                 assert(entry != NULL);
407
408                 list_del(&entry->head);
409         } else {
410                 if (entry == NULL) {
411                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
412                         memset(entry, 0, sizeof(entry[0]));
413
414                         entry->value = value;
415                         set_irn_link(value, entry);
416                 }
417
418                 ++worklist->n_live_values;
419                 mark_irn_visited(value);
420         }
421
422         entry->reload_point = sched_point;
423         list_add_tail(&entry->head, &worklist->live_values);
424 }
425
426 static void worklist_remove(worklist_t *worklist, ir_node *value)
427 {
428         worklist_entry_t *entry = get_irn_link(value);
429         assert(entry != NULL);
430         list_del(&entry->head);
431         --worklist->n_live_values;
432
433         assert(worklist_contains(value));
434         mark_irn_not_visited(value);
435 }
436
437 static void update_max_pressure(worklist_t *worklist)
438 {
439         if (worklist->n_live_values > max_register_pressure)
440                 max_register_pressure = worklist->n_live_values;
441 }
442
443 static void do_spilling(ir_node *block, worklist_t *worklist)
444 {
445         ir_node *node;
446
447         assert(worklist != NULL);
448
449         sched_foreach_reverse(block, node) {
450                 int    i, arity;
451                 size_t n_defs = 0;
452
453                 DB((dbg, LEVEL_2, "\t%+F... ", node));
454                 update_max_pressure(worklist);
455
456                 if (is_Phi(node)) {
457                         ir_node *node2;
458                         /* TODO: if we have some free registers, then we could decide to
459                          * not spill some phis (but not for phis where at least 1 input is
460                          * themselfes) */
461
462                         /* we have to spill all phis that are not live */
463                         sched_foreach_reverse_from(node, node2) {
464                                 assert(is_Phi(node2));
465
466                                 if (worklist_contains(node2))
467                                         continue;
468                                 if (!arch_irn_consider_in_reg_alloc(cls, node2))
469                                         continue;
470
471                                 if (!tentative_mode)
472                                         be_spill_phi(senv, node2);
473                         }
474                         DB((dbg, LEVEL_2, "\n"));
475                         break;
476                 }
477
478                 /* remove values defined by this instruction from the workset. Values
479                  * defined but not in the workset need free registers */
480                 if (get_irn_mode(node) == mode_T) {
481                         const ir_edge_t *edge;
482
483                         foreach_out_edge(node, edge) {
484                                 ir_node *proj = get_edge_src_irn(edge);
485                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
486                                         continue;
487                                 if (worklist_contains(proj)) {
488                                         worklist_remove(worklist, proj);
489                                 } else {
490                                         ++n_defs;
491                                 }
492                         }
493                 } else if (arch_irn_consider_in_reg_alloc(cls, node)) {
494                         if (worklist_contains(node)) {
495                                 worklist_remove(worklist, node);
496                         } else {
497                                 n_defs = 1;
498                         }
499                 }
500
501                 /* make sure we have enough free registers for the spills */
502                 make_room(worklist, n_defs);
503
504                 /* put all values used by the instruction into the workset */
505                 arity = get_irn_arity(node);
506                 for(i = 0; i < arity; ++i) {
507                         ir_node *use = get_irn_n(node, i);
508
509                         if (!arch_irn_consider_in_reg_alloc(cls, use))
510                                 continue;
511
512                         val_used(worklist, use, node);
513                 }
514
515                 /* we might have too many values in the worklist now and need to spill
516                  * some */
517                 make_room(worklist, 0);
518
519 #ifdef DEBUG_libfirm
520                 print_worklist(worklist, LEVEL_2);
521                 DB((dbg, LEVEL_2, "\n"));
522 #endif
523         }
524
525         update_max_pressure(worklist);
526
527 #ifdef DEBUG_libfirm
528         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
529         print_worklist(worklist, LEVEL_1);
530         DB((dbg, LEVEL_1, "\n"));
531 #endif
532 }
533
534 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
535 {
536         const struct list_head *i1 = &wl1->live_values;
537         const struct list_head *i2 = &wl2->live_values;
538
539         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
540                         i1 = i1->next, i2 = i2->next) {
541                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
542                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
543
544                 if (entry1->value != entry2->value)
545                         return false;
546         }
547         /* both lists at the end */
548         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
549                 return false;
550
551         return true;
552 }
553
554 static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
555 {
556         double           best_execfreq   = -1;
557         worklist_t      *best_worklist   = NULL;
558         ir_node         *best_succ_block = NULL;
559         int              best_pos;
560         const ir_edge_t *edge;
561
562         /* construct worklist */
563         foreach_block_succ(block, edge) {
564                 ir_node      *succ_block = get_edge_src_irn(edge);
565                 double       execfreq    = get_block_execfreq(exec_freq, succ_block);
566                 block_info_t *block_info;
567                 worklist_t   *succ_worklist;
568
569                 if (execfreq < best_execfreq)
570                         continue;
571
572                 block_info    = get_block_info(succ_block);
573                 succ_worklist = block_info->start_worklist;
574
575                 if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
576                         continue;
577
578                 best_execfreq   = execfreq;
579                 best_worklist   = succ_worklist;
580                 best_succ_block = succ_block;
581                 best_pos        = get_edge_src_pos(edge);
582         }
583
584         if (best_worklist == NULL)
585                 return false;
586         best_worklist->visited = worklist_visited;
587
588         fill_and_activate_worklist(new_worklist, best_worklist, block,
589                         best_succ_block, best_pos);
590         return true;
591 }
592
593 static worklist_t *construct_start_worklist(ir_node *block)
594 {
595         worklist_t *worklist = new_worklist();
596
597         ++worklist_visited;
598
599         while(fill_start_worklist(worklist, block)) {
600                 if (worklist->n_live_values >= n_regs)
601                         break;
602         }
603
604         return worklist;
605 }
606
607 static void process_block(ir_node *block, void *env)
608 {
609         block_info_t    *block_info;
610         worklist_t      *worklist;
611         int              n_preds;
612
613         (void) env;
614         DB((dbg, LEVEL_1, "Processing %+F\n", block));
615
616         worklist   = construct_start_worklist(block);
617         block_info = get_block_info(block);
618
619 #ifdef DEBUG_libfirm
620         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
621         print_worklist(worklist, LEVEL_1);
622         DB((dbg, LEVEL_1, "\n"));
623
624         if (should_have_reached_fixpoint) {
625                 assert(worklists_equal(block_info->end_worklist, worklist));
626         }
627 #endif
628         block_info->end_worklist = duplicate_worklist(worklist);
629
630         do_spilling(block, worklist);
631         deactivate_worklist(worklist);
632
633 #ifdef DEBUG_libfirm
634         if (should_have_reached_fixpoint) {
635                 assert(worklists_equal(block_info->start_worklist, worklist));
636         }
637 #endif
638         block_info->start_worklist = worklist;
639
640         /* we shouldn't have any live values left at the start block */
641         n_preds = get_Block_n_cfgpreds(block);
642         //assert(n_preds != 0 || worklist->n_live_values == 0);
643 }
644
645 typedef union block_or_loop_t block_or_loop_t;
646 union block_or_loop_t {
647         ir_node *block;
648         ir_loop *loop;
649 };
650
651 static block_or_loop_t *loop_blocks;
652 static ir_loop         *current_loop;
653
654 static void find_blocks(ir_node *block);
655
656 static void find_in_loop(ir_loop *loop, ir_node *entry)
657 {
658         loop_info_t     *loop_info = get_loop_info(loop);
659         block_or_loop_t block_or_loop;
660         loop_edge_t     *edge;
661
662         /* simply mark 1 block in the loop to indicate that the loop was already
663          * processed */
664         ir_node *some_block = loop_info->entry_edges->block;
665         if (Block_block_visited(some_block))
666                 return;
667
668         block_or_loop.loop = loop;
669         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
670
671 #ifndef NDEBUG
672         {
673                 /* we should be 1 of the entry blocks */
674                 loop_edge_t *edge  = loop_info->entry_edges;
675                 bool         found = false;
676                 for ( ; edge != NULL; edge = edge->next) {
677                         if (edge->block == entry) {
678                                 found = true;
679                                 break;
680                         }
681                 }
682                 assert(found);
683         }
684 #else
685         (void) entry;
686 #endif
687
688         /* check all loop successors */
689         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
690                 ir_node *succ      = edge->block;
691                 ir_loop *succ_loop = get_irn_loop(succ);
692
693                 if (succ_loop == current_loop) {
694                         find_blocks(succ);
695                 } else {
696                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
697                 }
698         }
699 }
700
701 static void find_blocks(ir_node *block)
702 {
703         const ir_edge_t *edge;
704         block_or_loop_t block_or_loop;
705
706         if (Block_block_visited(block))
707                 return;
708
709         block_or_loop.block = block;
710
711         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
712         mark_Block_block_visited(block);
713
714         foreach_block_succ(block, edge) {
715                 ir_node *succ = get_edge_src_irn(edge);
716
717                 /* is it part of the current loop? */
718                 ir_loop *loop = get_irn_loop(succ);
719                 if (loop != current_loop) {
720                         /* a sub-loop? */
721                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
722                                 find_in_loop(loop, succ);
723                         } else {
724                                 /* parent loop: we're not interested in the block */
725                         }
726                 } else {
727                         find_blocks(succ);
728                 }
729         }
730 }
731
732 /**
733  * append an entry to a worklist. WARNING: The entry must not already be in the
734  * worklist.
735  */
736 static void worklist_append(worklist_t *worklist, ir_node *value,
737                             ir_node *reload_point,
738                                                         ir_loop *unused_livethrough_loop)
739 {
740         worklist_entry_t *entry;
741
742 #ifdef EXPENSIVE_CHECKS
743         foreach_worklist(entry, worklist) {
744                 assert(entry->value != value);
745         }
746 #endif
747
748         entry = obstack_alloc(&obst, sizeof(*entry));
749         memset(entry, 0, sizeof(entry[0]));
750
751         entry->value                   = value;
752         entry->reload_point            = reload_point;
753         entry->unused_livethrough_loop = unused_livethrough_loop;
754         list_add_tail(&entry->head, &worklist->live_values);
755         ++worklist->n_live_values;
756         assert(worklist->n_live_values <= n_regs);
757 }
758
759 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
760 {
761         loop_edge_t *edge;
762         ++worklist_visited;
763
764         /* add the value to all loop exit and entry blocks */
765         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
766                 ir_node            *block
767                         = get_Block_cfgpred_block(edge->block, edge->pos);
768                 const block_info_t *info     = get_block_info(block);
769                 worklist_t         *worklist = info->end_worklist;
770                 ir_node            *reload_point = NULL;
771
772                 if (worklist->visited >= worklist_visited)
773                         continue;
774                 worklist->visited = worklist_visited;
775
776                 /* TODO: we need a smarter mechanism here, that makes the reloader place
777                  * reload nodes on all loop exits... */
778
779                 worklist_append(worklist, value, reload_point, loop_info->loop);
780         }
781         edge = loop_info->entry_edges;
782         for ( ; edge != NULL; edge = edge->next) {
783                 ir_node            *entry_block = edge->block;
784                 const block_info_t *info        = get_block_info(entry_block);
785                 worklist_t         *worklist    = info->start_worklist;
786                 ir_node            *pred_block;
787                 ir_node            *reload_point;
788
789                 if (worklist->visited >= worklist_visited)
790                         continue;
791                 worklist->visited = worklist_visited;
792
793                 pred_block   = get_Block_cfgpred_block(entry_block, edge->pos);
794                 reload_point = be_get_end_of_block_insertion_point(pred_block);
795
796                 worklist_append(worklist, value, reload_point, loop_info->loop);
797         }
798
799         set_irn_link(value, NULL);
800         ++loop_info->max_register_pressure;
801 }
802
803 static void push_unused_livethroughs(loop_info_t *loop_info)
804 {
805         loop_edge_t *edge;
806
807         /* we can only push unused livethroughs if register pressure inside the loop
808          * was low enough */
809         if (loop_info->max_register_pressure >= n_regs)
810                 return;
811
812         /* find unused livethroughs: register pressure in the loop was low enough
813          * which means that we had no spills which implies that at every point in
814          * the loop all*/
815         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
816                 ir_node            *block          = edge->block;
817                 const block_info_t *info           = get_block_info(block);
818                 worklist_t         *start_worklist = info->start_worklist;
819                 ir_node            *exit_block;
820                 const block_info_t *exit_info;
821                 worklist_t         *end_worklist;
822                 struct list_head   *entry;
823
824                 if (start_worklist == NULL)
825                         continue;
826
827                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
828                 exit_info    = get_block_info(exit_block);
829                 end_worklist = exit_info->end_worklist;
830
831                 activate_worklist(end_worklist);
832                 /* all values contained in the start_worklist, which are not available
833                  * in the end_worklist, must be unused livethroughs */
834
835                 list_for_each(entry, &start_worklist->live_values) {
836                         worklist_entry_t *wl_entry
837                                 = list_entry(entry, worklist_entry_t, head);
838                         ir_node *value = wl_entry->value;
839
840                         if (loop_info->max_register_pressure >= n_regs)
841                                 break;
842
843
844                         if (!worklist_contains(value)) {
845                                 /* add it to all loop exits */
846                                 DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
847                                 push_unused_livethrough(loop_info, value);
848                                 /* the value should now be part of end_worklist, so mark it */
849                                 mark_irn_visited(value);
850                         }
851                 }
852
853                 deactivate_worklist(end_worklist);
854         }
855 }
856
857 static void process_block_or_loop(const block_or_loop_t block_or_loop)
858 {
859         if (block_or_loop.loop->kind == k_ir_loop) {
860                 loop_info_t *loop_info = get_loop_info(block_or_loop.loop);
861
862                 if (do_push_unused_livethroughs)
863                         push_unused_livethroughs(loop_info);
864
865                 if (loop_info->max_register_pressure > max_register_pressure)
866                         max_register_pressure = loop_info->max_register_pressure;
867
868                 return;
869         }
870         process_block(block_or_loop.block, NULL);
871 }
872
873 static void process_loop(ir_loop *loop)
874 {
875         int         n_elements = get_loop_n_elements(loop);
876         int         i, len;
877         loop_info_t *loop_info;
878         loop_edge_t *edge;
879         ir_node     *some_block;
880
881         /* first handle all sub-loops */
882         for (i = 0; i < n_elements; ++i) {
883                 loop_element element = get_loop_element(loop, i);
884                 if (*element.kind != k_ir_loop)
885                         continue;
886
887                 process_loop(element.son);
888         }
889
890         /* create a postorder of the blocks */
891         loop_info = get_loop_info(loop);
892         edge      = loop_info->entry_edges;
893         if (edge != NULL) {
894                 some_block = edge->block;
895         } else {
896                 assert(loop == get_irg_loop(current_ir_graph));
897                 some_block = get_irg_start_block(current_ir_graph);
898         }
899
900         loop_blocks  = NEW_ARR_F(block_or_loop_t, 0);
901         current_loop = loop;
902
903         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
904         inc_irg_block_visited(current_ir_graph);
905         find_blocks(some_block);
906         /* for endless loops the end-block might be unreachable */
907         if (loop == get_irg_loop(current_ir_graph)) {
908                 find_blocks(get_irg_end_block(current_ir_graph));
909         }
910         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
911
912         DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
913         len = ARR_LEN(loop_blocks);
914         for (i = 0; i < len; ++i) {
915                 block_or_loop_t block_or_loop = loop_blocks[i];
916                 if (block_or_loop.loop->kind == k_ir_loop) {
917                         DB((dbg, LEVEL_3, " L-%p", block_or_loop.loop));
918                 } else {
919                         DB((dbg, LEVEL_3, " B-%+F", block_or_loop.block));
920                 }
921         }
922         DB((dbg, LEVEL_3, "\n"));
923
924         max_register_pressure = 0;
925
926         /* run1: tentative phase */
927         tentative_mode = true;
928         for (i = len-1; i >= 0; --i) {
929                 process_block_or_loop(loop_blocks[i]);
930         }
931
932         /* run2: tentative phase - should reach fixpoint */
933         tentative_mode = true;
934         for (i = len-1; i >= 0; --i) {
935                 process_block_or_loop(loop_blocks[i]);
936         }
937
938 #ifndef NDEBUG
939         /* run3: tentative phase - check fixpoint */
940         tentative_mode               = true;
941         should_have_reached_fixpoint = true;
942         for (i = len-1; i >= 0; --i) {
943                 process_block_or_loop(loop_blocks[i]);
944         }
945         should_have_reached_fixpoint = false;
946 #endif
947
948         /* run4: add spills/reloads */
949         tentative_mode              = false;
950         do_push_unused_livethroughs = true;
951         for (i = len-1; i >= 0; --i) {
952                 process_block_or_loop(loop_blocks[i]);
953         }
954         do_push_unused_livethroughs = false;
955
956         loop_info->max_register_pressure = max_register_pressure;
957         DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
958                                 (unsigned) max_register_pressure));
959
960         DEL_ARR_F(loop_blocks);
961 }
962
963 static void fix_block_borders(ir_node *block, void *data)
964 {
965         block_info_t *block_info     = get_block_info(block);
966         worklist_t   *start_worklist = block_info->start_worklist;
967         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
968         ir_loop      *loop           = get_irn_loop(block);
969         int           i;
970
971         (void) data;
972         assert(start_worklist != NULL);
973
974         for (i = 0; i < n_cfgpreds; ++i) {
975                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
976                 block_info_t *pred_block_info = get_block_info(pred_block);
977                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
978                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
979                 bool          is_loop_entry   = false;
980                 worklist_entry_t *entry;
981
982                 assert(end_worklist != NULL);
983
984                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
985                         is_loop_entry = true;
986                 }
987
988                 /* reload missing values */
989                 activate_worklist(end_worklist);
990
991                 foreach_worklist(entry, start_worklist) {
992                         ir_node *value = entry->value;
993
994                         if (!is_loop_entry && entry->unused_livethrough_loop != NULL)
995                                 continue;
996
997                         if (is_Phi(value) && get_nodes_block(value) == block) {
998                                 value = get_irn_n(value, i);
999
1000                                 /* we might have unknowns as argument for the phi */
1001                                 if (!arch_irn_consider_in_reg_alloc(cls, value))
1002                                         continue;
1003                         }
1004                         if (worklist_contains(value))
1005                                 continue;
1006                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
1007                 }
1008
1009                 deactivate_worklist(end_worklist);
1010         }
1011 }
1012
1013 /**
1014  * Run the spill algorithm.
1015  *
1016  * @param birg  the graph to run on
1017  * @param ncls  the register class to run on
1018  */
1019 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
1020 {
1021         ir_graph *irg = be_get_birg_irg(birg);
1022
1023         cls    = ncls;
1024         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1025
1026         /* shortcut for register classes with ignore regs only */
1027         if (n_regs == 0)
1028                 return;
1029
1030         worklist_visited = 0;
1031         exec_freq        = be_get_birg_exec_freq(birg);
1032
1033         be_clear_links(irg);
1034         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1035                         | IR_RESOURCE_LOOP_LINK);
1036         inc_irg_visited(irg);
1037
1038         obstack_init(&obst);
1039         senv = be_new_spill_env(birg);
1040
1041         assure_cf_loop(irg);
1042         clear_loop_info(get_irg_loop(irg));
1043         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
1044
1045         process_loop(get_irg_loop(irg));
1046
1047         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1048
1049         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1050                         | IR_RESOURCE_LOOP_LINK);
1051
1052         be_insert_spills_reloads(senv);
1053
1054         obstack_free(&obst, NULL);
1055
1056         /* clean up */
1057         be_delete_spill_env(senv);
1058 }
1059
1060 void be_init_spillbelady3(void)
1061 {
1062         static be_spiller_t belady3_spiller = {
1063                 be_spill_belady3
1064         };
1065
1066         be_register_spiller("belady3", &belady3_spiller);
1067         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1068 }
1069
1070 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);