- fixed comment: bs cannot be NULL anymore (and was never NULL previously)
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include <stdbool.h>
37
38 #include "debug.h"
39 #include "list.h"
40 #include "pdeq.h"
41
42 #include "irnode_t.h"
43 #include "irprintf.h"
44 #include "iredges_t.h"
45 #include "irloop_t.h"
46 #include "irgwalk.h"
47 #include "execfreq.h"
48
49 #include "bemodule.h"
50 #include "bespill.h"
51 #include "beutil.h"
52 #include "bespilloptions.h"
53 #include "besched_t.h"
54 #include "be_t.h"
55
56 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
57
58 typedef struct loop_edge_t loop_edge_t;
59 struct loop_edge_t {
60         ir_node     *block;
61         int          pos;
62         loop_edge_t *next;
63 };
64
65 typedef struct loop_info_t loop_info_t;
66 struct loop_info_t {
67         ir_loop     *loop;
68         loop_edge_t *entry_edges;
69         loop_edge_t *exit_edges;
70         size_t       max_register_pressure;
71 };
72
73 typedef struct worklist_entry_t worklist_entry_t;
74 struct worklist_entry_t {
75         struct list_head  head;
76         ir_node          *value;
77         ir_node          *reload_point;
78         ir_loop          *unused_livethrough_loop;
79 };
80
81 typedef struct worklist_t worklist_t;
82 struct worklist_t {
83         struct list_head  live_values;
84         size_t            n_live_values;
85 };
86
87 typedef struct block_info_t block_info_t;
88 struct block_info_t {
89         worklist_t *start_worklist;
90         worklist_t *end_worklist;
91 };
92
93 static const arch_env_t            *arch_env;
94 static const arch_register_class_t *cls;
95 static struct obstack               obst;
96 static spill_env_t                 *senv;
97 static size_t                       n_regs;
98 static size_t                       max_register_pressure;
99 static bool                         tentative_mode;
100 static bool                         should_have_reached_fixpoint;
101 static ir_exec_freq                *exec_freq;
102
103 static worklist_t *new_worklist(void)
104 {
105         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
106         memset(worklist, 0, sizeof(worklist[0]));
107
108         INIT_LIST_HEAD(&worklist->live_values);
109         worklist->n_live_values    = 0;
110
111         return worklist;
112 }
113
114 static void mark_irn_not_visited(ir_node *node)
115 {
116         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
117 }
118
119 static bool worklist_contains(const ir_node *node)
120 {
121         return irn_visited(node);
122 }
123
124 static block_info_t *get_block_info(ir_node *block)
125 {
126         block_info_t *info = get_irn_link(block);
127         if (info != NULL)
128                 return info;
129
130         info = obstack_alloc(&obst, sizeof(info[0]));
131         memset(info, 0, sizeof(info[0]));
132         set_irn_link(block, info);
133         return info;
134 }
135
136 static void deactivate_worklist(const worklist_t *worklist)
137 {
138         struct list_head *entry;
139
140         list_for_each(entry, &worklist->live_values) {
141                 worklist_entry_t *wl_entry
142                         = list_entry(entry, worklist_entry_t, head);
143                 assert(worklist_contains(wl_entry->value));
144                 mark_irn_not_visited(wl_entry->value);
145                 set_irn_link(wl_entry->value, NULL);
146         }
147 }
148
149 static void activate_worklist(const worklist_t *worklist)
150 {
151         struct list_head *entry;
152
153         list_for_each(entry, &worklist->live_values) {
154                 worklist_entry_t *wl_entry
155                         = list_entry(entry, worklist_entry_t, head);
156                 assert(!worklist_contains(wl_entry->value));
157                 mark_irn_visited(wl_entry->value);
158                 set_irn_link(wl_entry->value, wl_entry);
159         }
160 }
161
162 /**
163  * duplicate a worklist and directly activates it
164  */
165 static worklist_t *duplicate_activate_worklist(const worklist_t *worklist,
166                 ir_node *block, ir_node *succ_block, int succ_pos)
167 {
168         ir_node          *reload_point  = NULL;
169         size_t            n_live_values = 0;
170         worklist_t       *new_worklist;
171         struct list_head *entry;
172
173         if (succ_block != NULL &&
174                         (get_Block_n_cfgpreds(succ_block) > 1
175                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
176                 reload_point = be_get_end_of_block_insertion_point(block);
177         }
178
179         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
180         INIT_LIST_HEAD(&new_worklist->live_values);
181
182         list_for_each(entry, &worklist->live_values) {
183                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
184                 ir_node          *value     = wl_entry->value;
185                 worklist_entry_t *new_entry;
186
187                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
188                         value = get_Phi_pred(value, succ_pos);
189
190                         /* can happen for unknown phi preds */
191                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
192                                 continue;
193                 }
194
195                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
196                 memset(new_entry, 0, sizeof(new_entry[0]));
197
198                 new_entry->value = value;
199                 if (reload_point != NULL) {
200                         new_entry->reload_point = reload_point;
201                 } else {
202                         new_entry->reload_point = wl_entry->reload_point;
203                 }
204
205                 if (irn_not_visited(value)) {
206                         list_add_tail(&new_entry->head, &new_worklist->live_values);
207                         ++n_live_values;
208
209                         mark_irn_visited(value);
210                         set_irn_link(value, new_entry);
211                 }
212         }
213         new_worklist->n_live_values = n_live_values;
214
215         return new_worklist;
216 }
217
218 static worklist_t *duplicate_worklist(const worklist_t *worklist)
219 {
220         worklist_t       *new_worklist;
221         struct list_head *entry;
222
223         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
224         INIT_LIST_HEAD(&new_worklist->live_values);
225         new_worklist->n_live_values = worklist->n_live_values;
226
227         list_for_each(entry, &worklist->live_values) {
228                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
229                 worklist_entry_t *new_entry
230                         = obstack_alloc(&obst, sizeof(new_entry[0]));
231
232                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
233                 list_add_tail(&new_entry->head, &new_worklist->live_values);
234         }
235
236         return new_worklist;
237 }
238
239 #ifdef DEBUG_libfirm
240 static void print_worklist(const worklist_t *worklist, int level)
241 {
242         struct list_head *entry;
243
244         DB((dbg, level, "%d values: ", worklist->n_live_values));
245         list_for_each(entry, &worklist->live_values) {
246                 worklist_entry_t *wl_entry
247                         = list_entry(entry, worklist_entry_t, head);
248
249                 DB((dbg, level, "%+F ", wl_entry->value));
250         }
251 }
252 #endif
253
254
255
256 static void clear_loop_info(ir_loop *loop)
257 {
258         int n_elements = get_loop_n_elements(loop);
259         int i;
260
261         loop->link = NULL;
262         for (i = 0; i < n_elements; ++i) {
263                 loop_element element = get_loop_element(loop, i);
264                 if (*element.kind != k_ir_loop)
265                         continue;
266
267                 clear_loop_info(element.son);
268         }
269 }
270
271 static loop_info_t *get_loop_info(ir_loop *loop)
272 {
273         loop_info_t *info = loop->link;
274         if (info != NULL)
275                 return info;
276
277         info = obstack_alloc(&obst, sizeof(info[0]));
278         memset(info, 0, sizeof(info[0]));
279         info->loop = loop;
280         loop->link = info;
281         return info;
282 }
283
284 /**
285  * constructs loop in+out edges when called in a block walker
286  */
287 static void construct_loop_edges(ir_node *block, void *data)
288 {
289         int      n_cfgpreds = get_Block_n_cfgpreds(block);
290         ir_loop *loop       = get_irn_loop(block);
291         int      i;
292
293         (void) data;
294
295         for (i = 0; i < n_cfgpreds; ++i) {
296                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
297                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
298                 loop_edge_t *edge;
299                 bool        is_exit_edge;
300                 ir_loop     *l, *goal;
301
302                 if (cfgpred_loop == loop)
303                         continue;
304
305                 /* critical edges are splitted, so we can't jump from 1 loop
306                  * directly into another without going out of it */
307                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
308
309                 /* edge out of loop */
310                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
311                         is_exit_edge = true;
312                         l            = cfgpred_loop;
313                         goal         = loop;
314                 } else {
315                         is_exit_edge = false;
316                         l            = loop;
317                         goal         = cfgpred_loop;
318                 }
319
320                 /* this might be a jump out/in multiple loops */
321                 do {
322                         loop_info_t *l_info = get_loop_info(l);
323
324                         edge = obstack_alloc(&obst, sizeof(edge[0]));
325                         memset(edge, 0, sizeof(edge[0]));
326                         edge->block = block;
327                         edge->pos   = i;
328
329                         if (is_exit_edge) {
330                                 ir_fprintf(stderr, "Loop %p exit edge %+F:%d\n",
331                                            l, block, i);
332                                 edge->next         = l_info->exit_edges;
333                                 l_info->exit_edges = edge;
334                                 assert(get_loop_depth(loop) < get_loop_depth(l));
335                         } else {
336                                 ir_fprintf(stderr, "Loop %p entry edge %+F:%d\n",
337                                            l, block, i);
338                                 edge->next          = l_info->entry_edges;
339                                 l_info->entry_edges = edge;
340                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
341                         }
342                         l = get_loop_outer_loop(l);
343                 } while(l != goal);
344         }
345 }
346
347
348
349
350 static void place_reload(worklist_entry_t *entry)
351 {
352         if (tentative_mode)
353                 return;
354
355         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
356                 entry->reload_point));
357         assert(entry->reload_point != NULL);
358         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
359         entry->reload_point = NULL;
360 }
361
362 /**
363  * makes sure the worklist contains not more than n_regs - room_needed entries
364  */
365 static void make_room(worklist_t *worklist, size_t room_needed)
366 {
367         int               i;
368         int               spills_needed;
369         struct list_head *entry;
370
371         spills_needed = worklist->n_live_values + room_needed - n_regs;
372         if (spills_needed <= 0)
373                 return;
374
375         entry = worklist->live_values.next;
376         for(i = spills_needed; i > 0; --i) {
377                 struct list_head *next = entry->next;
378                 worklist_entry_t *wl_entry
379                         = list_entry(entry, worklist_entry_t, head);
380
381                 assert(worklist_contains(wl_entry->value));
382                 mark_irn_not_visited(wl_entry->value);
383                 place_reload(wl_entry);
384                 list_del(entry);
385
386                 entry = next;
387         }
388         worklist->n_live_values -= spills_needed;
389 }
390
391 /**
392  * a value was used, so bring it to the back of the worklist (which might
393  * result in a spill of another value).
394  */
395 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
396 {
397         /* already in the worklist? move around, otherwise add at back */
398         worklist_entry_t *entry = get_irn_link(value);
399
400         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, value));
401
402         if (worklist_contains(value)) {
403                 assert(entry != NULL);
404
405                 list_del(&entry->head);
406         } else {
407                 if (entry == NULL) {
408                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
409                         memset(entry, 0, sizeof(entry[0]));
410
411                         entry->value = value;
412                         set_irn_link(value, entry);
413                 }
414
415                 ++worklist->n_live_values;
416                 mark_irn_visited(value);
417         }
418
419         entry->reload_point = sched_point;
420         list_add_tail(&entry->head, &worklist->live_values);
421 }
422
423 static void worklist_remove(worklist_t *worklist, ir_node *value)
424 {
425         worklist_entry_t *entry = get_irn_link(value);
426         ir_fprintf(stderr, "removing %+F\n", value);
427         assert(entry != NULL);
428         list_del(&entry->head);
429         --worklist->n_live_values;
430
431         assert(worklist_contains(value));
432         mark_irn_not_visited(value);
433 }
434
435 static void update_max_pressure(worklist_t *worklist)
436 {
437         if (worklist->n_live_values > max_register_pressure)
438                 max_register_pressure = worklist->n_live_values;
439 }
440
441 static void do_spilling(ir_node *block, worklist_t *worklist)
442 {
443         ir_node *node;
444
445         assert(worklist != NULL);
446
447         sched_foreach_reverse(block, node) {
448                 int    i, arity;
449                 size_t n_defs = 0;
450
451                 DB((dbg, LEVEL_2, "\t%+F... ", node));
452                 update_max_pressure(worklist);
453
454                 if (is_Phi(node)) {
455                         ir_node *node2;
456                         /* TODO: if we have some free registers, then we could decide to
457                          * not spill some phis (but not for phis where at least 1 input is
458                          * themselfes) */
459
460                         /* we have to spill all phis that are not live */
461                         sched_foreach_reverse_from(node, node2) {
462                                 assert(is_Phi(node2));
463
464                                 if (worklist_contains(node2))
465                                         continue;
466                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node2))
467                                         continue;
468
469                                 if (!tentative_mode)
470                                         be_spill_phi(senv, node2);
471                         }
472                         DB((dbg, LEVEL_2, "\n"));
473                         break;
474                 }
475
476                 /* remove values defined by this instruction from the workset. Values
477                  * defined but not in the workset need free registers */
478                 if (get_irn_mode(node) == mode_T) {
479                         const ir_edge_t *edge;
480
481                         foreach_out_edge(node, edge) {
482                                 ir_node *proj = get_edge_src_irn(edge);
483                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
484                                         continue;
485                                 if (worklist_contains(proj)) {
486                                         worklist_remove(worklist, proj);
487                                 } else {
488                                         ++n_defs;
489                                 }
490                         }
491                 } else if (arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
492                         if (worklist_contains(node)) {
493                                 worklist_remove(worklist, node);
494                         } else {
495                                 n_defs = 1;
496                         }
497                 }
498
499                 /* make sure we have enough free registers for the spills */
500                 make_room(worklist, n_defs);
501
502                 /* put all values used by the instruction into the workset */
503                 arity = get_irn_arity(node);
504                 for(i = 0; i < arity; ++i) {
505                         ir_node *use = get_irn_n(node, i);
506
507                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, use))
508                                 continue;
509
510                         val_used(worklist, use, node);
511                 }
512
513                 /* we might have too many values in the worklist now and need to spill
514                  * some */
515                 make_room(worklist, 0);
516
517 #ifdef DEBUG_libfirm
518                 print_worklist(worklist, LEVEL_2);
519                 DB((dbg, LEVEL_2, "\n"));
520 #endif
521         }
522
523         update_max_pressure(worklist);
524
525 #ifdef DEBUG_libfirm
526         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
527         print_worklist(worklist, LEVEL_1);
528         DB((dbg, LEVEL_1, "\n"));
529 #endif
530 }
531
532 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
533 {
534         const struct list_head *i1 = &wl1->live_values;
535         const struct list_head *i2 = &wl2->live_values;
536
537         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
538                         i1 = i1->next, i2 = i2->next) {
539                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
540                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
541
542                 if (entry1->value != entry2->value)
543                         return false;
544         }
545         /* both lists at the end */
546         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
547                 return false;
548
549         return true;
550 }
551
552 static void process_block(ir_node *block, void *env)
553 {
554         block_info_t    *block_info      = NULL;
555         worklist_t      *worklist        = NULL;
556         double           best_execfreq   = -1;
557         ir_node         *best_succ_block = NULL;
558         int              best_pos        = -1;
559         int              n_preds;
560         const ir_edge_t *edge;
561
562         (void) env;
563         DB((dbg, LEVEL_1, "Processing %+F\n", block));
564
565         /* construct worklist */
566         foreach_block_succ(block, edge) {
567                 ir_node *succ_block = get_edge_src_irn(edge);
568                 double   execfreq   = get_block_execfreq(exec_freq, succ_block);
569
570                 if (execfreq > best_execfreq) {
571                         block_info_t *block_info    = get_block_info(succ_block);
572                         worklist_t   *succ_worklist = block_info->start_worklist;
573                         if (succ_worklist != NULL) {
574                                 best_execfreq   = execfreq;
575                                 worklist        = succ_worklist;
576                                 best_succ_block = succ_block;
577                                 best_pos        = get_edge_src_pos(edge);
578                         }
579                 }
580         }
581         if (worklist == NULL) {
582                 /* at least 1 successor block must have been processed unless it was
583                  * the end block */
584                 assert(tentative_mode || block == get_irg_end_block(get_irn_irg(block)));
585
586                 worklist = new_worklist();
587         } else {
588                 worklist = duplicate_activate_worklist(worklist, block, best_succ_block,
589                                                        best_pos);
590         }
591
592         block_info = get_block_info(block);
593
594 #ifdef DEBUG_libfirm
595         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
596         print_worklist(worklist, LEVEL_1);
597         DB((dbg, LEVEL_1, "\n"));
598
599         if (should_have_reached_fixpoint) {
600                 assert(worklists_equal(block_info->end_worklist, worklist));
601         }
602 #endif
603         block_info->end_worklist = duplicate_worklist(worklist);
604
605         do_spilling(block, worklist);
606         deactivate_worklist(worklist);
607
608 #ifdef DEBUG_libfirm
609         if (should_have_reached_fixpoint) {
610                 assert(worklists_equal(block_info->start_worklist, worklist));
611         }
612 #endif
613         block_info->start_worklist = worklist;
614
615         /* we shouldn't have any live values left at the start block */
616         n_preds = get_Block_n_cfgpreds(block);
617         assert(n_preds != 0 || worklist->n_live_values == 0);
618 }
619
620 typedef struct block_or_loop_t block_or_loop_t;
621 struct block_or_loop_t {
622         union {
623                 ir_node *block;
624                 ir_loop *loop;
625         } v;
626         bool is_loop;
627 };
628
629 static block_or_loop_t *loop_blocks;
630 static ir_loop         *current_loop;
631
632 static void find_blocks(ir_node *block);
633
634 static void find_in_loop(ir_loop *loop, ir_node *entry)
635 {
636         loop_info_t     *loop_info = get_loop_info(loop);
637         block_or_loop_t block_or_loop;
638         loop_edge_t     *edge;
639
640         /* simply mark 1 block in the loop to indicate that the loop was already
641          * processed */
642         ir_node *some_block = loop_info->entry_edges->block;
643         if (Block_block_visited(some_block))
644                 return;
645
646         block_or_loop.v.loop  = loop;
647         block_or_loop.is_loop = true;
648         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
649
650 #ifndef NDEBUG
651         {
652                 /* we should be 1 of the entry blocks */
653                 loop_edge_t *edge  = loop_info->entry_edges;
654                 bool         found = false;
655                 for ( ; edge != NULL; edge = edge->next) {
656                         if (edge->block == entry)
657                                 found = true;
658                 }
659                 assert(found);
660         }
661 #endif
662         /* check all loop successors */
663         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
664                 ir_node *succ      = edge->block;
665                 ir_loop *succ_loop = get_irn_loop(succ);
666
667                 if (succ_loop == current_loop) {
668                         find_blocks(succ);
669                 } else {
670                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
671                 }
672         }
673 }
674
675 static void find_blocks(ir_node *block)
676 {
677         const ir_edge_t *edge;
678         block_or_loop_t block_or_loop;
679
680         if (Block_block_visited(block))
681                 return;
682
683         block_or_loop.v.block = block;
684         block_or_loop.is_loop = false;
685
686         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
687         mark_Block_block_visited(block);
688
689         foreach_block_succ(block, edge) {
690                 ir_node *succ = get_edge_src_irn(edge);
691
692                 /* is it part of the current loop? */
693                 ir_loop *loop = get_irn_loop(succ);
694                 if (loop != current_loop) {
695                         /* a sub-loop? */
696                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
697                                 find_in_loop(loop, succ);
698                         } else {
699                                 /* parent loop: we're not interested in the block */
700                         }
701                 } else {
702                         find_blocks(succ);
703                 }
704         }
705 }
706
707 /**
708  * append an entry to a worklist. WARNING: The entry must not already be in the
709  * worklist.
710  */
711 static void worklist_append(worklist_t *worklist, ir_node *value,
712                             ir_node *reload_point,
713                                                         ir_loop *unused_livethrough_loop)
714 {
715         worklist_entry_t *entry     = obstack_alloc(&obst, sizeof(entry[0]));
716         memset(entry, 0, sizeof(entry[0]));
717
718         entry->value                   = value;
719         entry->reload_point            = reload_point;
720         entry->unused_livethrough_loop = unused_livethrough_loop;
721         list_add_tail(&entry->head, &worklist->live_values);
722         ++worklist->n_live_values;
723         assert(worklist->n_live_values <= n_regs);
724 }
725
726 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
727 {
728         /* add the value to all loop exit and entry blocks */
729         loop_edge_t *edge = loop_info->exit_edges;
730         for ( ; edge != NULL; edge = edge->next) {
731                 ir_node            *block
732                         = get_Block_cfgpred_block(edge->block, edge->pos);
733                 const block_info_t *info     = get_block_info(block);
734                 worklist_t         *worklist = info->end_worklist;
735                 /* TODO: we need a smarter mechanism here, that makes the reloader place
736                  * reload nodes on all loop exits... */
737                 ir_node *reload_point = NULL;
738
739                 worklist_append(worklist, value, reload_point, loop_info->loop);
740         }
741         edge = loop_info->entry_edges;
742         for ( ; edge != NULL; edge = edge->next) {
743                 ir_node            *entry_block = edge->block;
744                 const block_info_t *info        = get_block_info(entry_block);
745                 worklist_t         *worklist    = info->start_worklist;
746
747                 ir_node *pred_block
748                         = get_Block_cfgpred_block(entry_block, edge->pos);
749                 ir_node *reload_point = be_get_end_of_block_insertion_point(pred_block);
750
751                 worklist_append(worklist, value, reload_point, loop_info->loop);
752         }
753
754         set_irn_link(value, NULL);
755         ++loop_info->max_register_pressure;
756 }
757
758 static void push_unused_livethroughs(loop_info_t *loop_info)
759 {
760         loop_edge_t *edge;
761
762         /* we can only push unused livethroughs if register pressure inside the loop
763          * was low enough */
764         if (loop_info->max_register_pressure >= n_regs)
765                 return;
766
767         /* find unused livethroughs: register pressure in the loop was low enough
768          * which means that we had no spills which implies that at every point in
769          * the loop all*/
770         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
771                 ir_node            *block          = edge->block;
772                 const block_info_t *info           = get_block_info(block);
773                 worklist_t         *start_worklist = info->start_worklist;
774                 ir_node            *exit_block;
775                 const block_info_t *exit_info;
776                 worklist_t         *end_worklist;
777                 struct list_head   *entry;
778
779                 if (start_worklist == NULL)
780                         continue;
781
782                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
783                 exit_info    = get_block_info(exit_block);
784                 end_worklist = exit_info->end_worklist;
785
786                 activate_worklist(end_worklist);
787                 /* all values contained in the start_worklist, which are not available
788                  * in the end_worklist, must be unused livethroughs */
789
790                 list_for_each(entry, &start_worklist->live_values) {
791                         worklist_entry_t *wl_entry
792                                 = list_entry(entry, worklist_entry_t, head);
793                         ir_node *value = wl_entry->value;
794
795                         if (!worklist_contains(value)) {
796                                 /* add it to all loop exits */
797                                 ir_fprintf(stderr, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure);
798                                 push_unused_livethrough(loop_info, value);
799                                 mark_irn_visited(value);
800                         }
801
802                         if (loop_info->max_register_pressure >= n_regs)
803                                 break;
804                 }
805                 deactivate_worklist(end_worklist);
806         }
807 }
808
809 static void process_block_or_loop(const block_or_loop_t *block_or_loop)
810 {
811         if (block_or_loop->is_loop) {
812                 loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop);
813
814                 if (loop_info->max_register_pressure > max_register_pressure)
815                         max_register_pressure = loop_info->max_register_pressure;
816
817                 push_unused_livethroughs(loop_info);
818                 return;
819         }
820         process_block(block_or_loop->v.block, NULL);
821 }
822
823 static void process_loop(ir_loop *loop)
824 {
825         int         n_elements = get_loop_n_elements(loop);
826         int         i, len;
827         loop_info_t *loop_info;
828         loop_edge_t *edge;
829         ir_node     *some_block;
830
831         /* first handle all sub-loops */
832         for (i = 0; i < n_elements; ++i) {
833                 loop_element element = get_loop_element(loop, i);
834                 if (*element.kind != k_ir_loop)
835                         continue;
836
837                 process_loop(element.son);
838         }
839
840         /* create a postorder of the blocks */
841         loop_info = get_loop_info(loop);
842         edge      = loop_info->entry_edges;
843         if (edge != NULL) {
844                 some_block = edge->block;
845         } else {
846                 assert(loop == get_irg_loop(current_ir_graph));
847                 some_block = get_irg_start_block(current_ir_graph);
848         }
849
850         loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
851         current_loop = loop;
852
853         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
854         inc_irg_block_visited(current_ir_graph);
855         find_blocks(some_block);
856         /* for endless loops the end-block might be unreachable */
857         if (loop == get_irg_loop(current_ir_graph)) {
858                 find_blocks(get_irg_end_block(current_ir_graph));
859         }
860         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
861
862         fprintf(stderr, "Block List for loop %p\n", loop);
863         len = ARR_LEN(loop_blocks);
864         for (i = 0; i < len; ++i) {
865                 block_or_loop_t *block_or_loop = &loop_blocks[i];
866                 if (block_or_loop->is_loop) {
867                         ir_fprintf(stderr, " L-%p", block_or_loop->v.loop);
868                 } else {
869                         ir_fprintf(stderr, " B-%+F", block_or_loop->v.block);
870                 }
871         }
872         fprintf(stderr, "\n");
873
874         max_register_pressure = 0;
875
876         /* run1: tentative phase */
877         tentative_mode = true;
878         for (i = len-1; i >= 0; --i) {
879                 process_block_or_loop(&loop_blocks[i]);
880         }
881
882         /* run2: tentative phase - should reach fixpoint */
883         tentative_mode = true;
884         for (i = len-1; i >= 0; --i) {
885                 process_block_or_loop(&loop_blocks[i]);
886         }
887
888 #ifndef NDEBUG
889         /* run3: tentative phase - check fixpoint */
890         tentative_mode               = true;
891         should_have_reached_fixpoint = true;
892         for (i = len-1; i >= 0; --i) {
893                 process_block_or_loop(&loop_blocks[i]);
894         }
895         should_have_reached_fixpoint = false;
896 #endif
897
898         /* run4: add spills/reloads */
899         tentative_mode = false;
900         for (i = len-1; i >= 0; --i) {
901                 process_block_or_loop(&loop_blocks[i]);
902         }
903
904         loop_info->max_register_pressure = max_register_pressure;
905         ir_fprintf(stderr, "Regpressure in loop %p: %u\n", loop,
906                    (unsigned) max_register_pressure);
907
908         DEL_ARR_F(loop_blocks);
909 }
910
911 static void fix_block_borders(ir_node *block, void *data)
912 {
913         block_info_t *block_info     = get_block_info(block);
914         worklist_t   *start_worklist = block_info->start_worklist;
915         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
916         ir_loop      *loop           = get_irn_loop(block);
917         int           i;
918
919         (void) data;
920         assert(start_worklist != NULL);
921
922         for (i = 0; i < n_cfgpreds; ++i) {
923                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
924                 block_info_t *pred_block_info = get_block_info(pred_block);
925                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
926                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
927                 bool          is_loop_entry   = false;
928                 struct list_head *entry;
929
930                 assert(end_worklist != NULL);
931
932                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
933                         is_loop_entry = true;
934                 }
935
936                 /* reload missing values */
937                 activate_worklist(end_worklist);
938
939                 list_for_each(entry, &start_worklist->live_values) {
940                         worklist_entry_t *wl_entry
941                                 = list_entry(entry, worklist_entry_t, head);
942                         ir_node          *value = wl_entry->value;
943
944                         if (is_Phi(value) && get_nodes_block(value) == block) {
945                                 value = get_irn_n(value, i);
946
947                                 /* we might have unknowns as argument for the phi */
948                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
949                                         continue;
950                         }
951
952                         if (worklist_contains(value))
953                                 continue;
954                         if (wl_entry->unused_livethrough_loop != NULL && !is_loop_entry)
955                                 continue;
956
957                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
958                 }
959
960                 deactivate_worklist(end_worklist);
961         }
962 }
963
964 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
965 {
966         ir_graph *irg = be_get_birg_irg(birg);
967
968         cls    = ncls;
969         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
970
971         /* shortcut for register classes with ignore regs only */
972         if (n_regs == 0)
973                 return;
974
975         arch_env  = be_get_birg_arch_env(birg);
976         exec_freq = be_get_birg_exec_freq(birg);
977
978         be_clear_links(irg);
979         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
980                         | IR_RESOURCE_LOOP_LINK);
981         inc_irg_visited(irg);
982
983         obstack_init(&obst);
984         senv = be_new_spill_env(birg);
985
986         assure_cf_loop(irg);
987         clear_loop_info(get_irg_loop(irg));
988         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
989
990         process_loop(get_irg_loop(current_ir_graph));
991
992         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
993
994         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
995                         | IR_RESOURCE_LOOP_LINK);
996
997         be_insert_spills_reloads(senv);
998
999         obstack_free(&obst, NULL);
1000
1001         /* clean up */
1002         be_delete_spill_env(senv);
1003 }
1004
1005 void be_init_spillbelady3(void)
1006 {
1007         static be_spiller_t belady3_spiller = {
1008                 be_spill_belady3
1009         };
1010
1011         be_register_spiller("belady3", &belady3_spiller);
1012         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1013 }
1014
1015 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);