0dc93b4035a3df2db156343e9c30766980af5b5f
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #include "config.h"
33
34 #include <stdbool.h>
35
36 #include "debug.h"
37 #include "list.h"
38 #include "pdeq.h"
39
40 #include "irnode_t.h"
41 #include "irprintf.h"
42 #include "iredges_t.h"
43 #include "irloop_t.h"
44 #include "irgwalk.h"
45 #include "execfreq.h"
46
47 #include "bemodule.h"
48 #include "bespill.h"
49 #include "beutil.h"
50 #include "bespilloptions.h"
51 #include "besched_t.h"
52 #include "be_t.h"
53
54 #ifndef NDEBUG
55 #define EXPENSIVE_CHECKS
56 #endif
57
58 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
59
60 typedef struct loop_edge_t loop_edge_t;
61 struct loop_edge_t {
62         ir_node     *block;
63         int          pos;
64         loop_edge_t *next;
65 };
66
67 typedef struct loop_info_t loop_info_t;
68 struct loop_info_t {
69         ir_loop     *loop;
70         loop_edge_t *entry_edges;
71         loop_edge_t *exit_edges;
72         size_t       max_register_pressure;
73 };
74
75 typedef struct worklist_entry_t worklist_entry_t;
76 struct worklist_entry_t {
77         struct list_head  head;
78         ir_node          *value;
79         ir_node          *reload_point;
80         ir_loop          *unused_livethrough_loop;
81 };
82
83 typedef struct worklist_t worklist_t;
84 struct worklist_t {
85         struct list_head  live_values;
86         size_t            n_live_values;
87         ir_visited_t      visited;
88 };
89
90 typedef struct block_info_t block_info_t;
91 struct block_info_t {
92         worklist_t *start_worklist;
93         worklist_t *end_worklist;
94 };
95
96 static const arch_register_class_t *cls;
97 static struct obstack               obst;
98 static spill_env_t                 *senv;
99 static size_t                       n_regs;
100 static size_t                       max_register_pressure;
101 static bool                         tentative_mode;
102 static bool                         should_have_reached_fixpoint;
103 static bool                         do_push_unused_livethroughs;
104 static ir_exec_freq                *exec_freq;
105 static ir_visited_t                 worklist_visited;
106
107 static worklist_t *new_worklist(void)
108 {
109         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
110         memset(worklist, 0, sizeof(worklist[0]));
111
112         INIT_LIST_HEAD(&worklist->live_values);
113         worklist->n_live_values    = 0;
114
115         return worklist;
116 }
117
118 static void mark_irn_not_visited(ir_node *node)
119 {
120         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
121 }
122
123 static bool worklist_contains(const ir_node *node)
124 {
125         return irn_visited(node);
126 }
127
128 static block_info_t *get_block_info(ir_node *block)
129 {
130         block_info_t *info = get_irn_link(block);
131         if (info != NULL)
132                 return info;
133
134         info = obstack_alloc(&obst, sizeof(info[0]));
135         memset(info, 0, sizeof(info[0]));
136         set_irn_link(block, info);
137         return info;
138 }
139
140 static void deactivate_worklist(const worklist_t *worklist)
141 {
142         struct list_head *entry;
143
144         list_for_each(entry, &worklist->live_values) {
145                 worklist_entry_t *wl_entry
146                         = list_entry(entry, worklist_entry_t, head);
147                 assert(worklist_contains(wl_entry->value));
148                 mark_irn_not_visited(wl_entry->value);
149                 set_irn_link(wl_entry->value, NULL);
150         }
151 }
152
153 static void activate_worklist(const worklist_t *worklist)
154 {
155         struct list_head *entry;
156
157         list_for_each(entry, &worklist->live_values) {
158                 worklist_entry_t *wl_entry
159                         = list_entry(entry, worklist_entry_t, head);
160                 assert(!worklist_contains(wl_entry->value));
161                 mark_irn_visited(wl_entry->value);
162                 set_irn_link(wl_entry->value, wl_entry);
163         }
164 }
165
166 /**
167  * duplicate a worklist and directly activates it
168  */
169 static void fill_and_activate_worklist(worklist_t *new_worklist,
170                 const worklist_t *worklist, ir_node *block, ir_node *succ_block,
171                 int succ_pos)
172 {
173         ir_node          *reload_point  = NULL;
174         size_t            n_live_values = 0;
175         struct list_head *entry;
176
177         if (succ_block != NULL &&
178                         (get_Block_n_cfgpreds(succ_block) > 1
179                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
180                 reload_point = be_get_end_of_block_insertion_point(block);
181         }
182
183         list_for_each(entry, &worklist->live_values) {
184                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
185                 ir_node          *value     = wl_entry->value;
186                 worklist_entry_t *new_entry;
187
188                 if (new_worklist->n_live_values >= n_regs)
189                         break;
190
191                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
192                         value = get_Phi_pred(value, succ_pos);
193
194                         /* can happen for unknown phi preds */
195                         if (!arch_irn_consider_in_reg_alloc(cls, value))
196                                 continue;
197                 }
198
199                 if (irn_visited_else_mark(value))
200                         continue;
201
202                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
203                 memset(new_entry, 0, sizeof(new_entry[0]));
204
205                 new_entry->value = value;
206                 if (reload_point != NULL) {
207                         new_entry->reload_point = reload_point;
208                 } else {
209                         new_entry->reload_point = wl_entry->reload_point;
210                 }
211
212                 list_add_tail(&new_entry->head, &new_worklist->live_values);
213                 ++n_live_values;
214
215                 set_irn_link(value, new_entry);
216                 new_worklist->n_live_values++;
217         }
218 }
219
220 static worklist_t *duplicate_worklist(const worklist_t *worklist)
221 {
222         worklist_t       *new_worklist;
223         struct list_head *entry;
224
225         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
226         memset(new_worklist, 0, sizeof(new_worklist[0]));
227         INIT_LIST_HEAD(&new_worklist->live_values);
228         new_worklist->n_live_values = worklist->n_live_values;
229
230         list_for_each(entry, &worklist->live_values) {
231                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
232                 worklist_entry_t *new_entry
233                         = obstack_alloc(&obst, sizeof(new_entry[0]));
234
235                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
236                 list_add_tail(&new_entry->head, &new_worklist->live_values);
237         }
238
239         return new_worklist;
240 }
241
242 #ifdef DEBUG_libfirm
243 static void print_worklist(const worklist_t *worklist, int level)
244 {
245         struct list_head *entry;
246
247         DB((dbg, level, "%d values: ", worklist->n_live_values));
248         list_for_each(entry, &worklist->live_values) {
249                 worklist_entry_t *wl_entry
250                         = list_entry(entry, worklist_entry_t, head);
251
252                 DB((dbg, level, "%+F ", wl_entry->value));
253         }
254 }
255 #endif
256
257
258
259 static void clear_loop_info(ir_loop *loop)
260 {
261         int n_elements = get_loop_n_elements(loop);
262         int i;
263
264         loop->link = NULL;
265         for (i = 0; i < n_elements; ++i) {
266                 loop_element element = get_loop_element(loop, i);
267                 if (*element.kind != k_ir_loop)
268                         continue;
269
270                 clear_loop_info(element.son);
271         }
272 }
273
274 static loop_info_t *get_loop_info(ir_loop *loop)
275 {
276         loop_info_t *info = loop->link;
277         if (info != NULL)
278                 return info;
279
280         info = obstack_alloc(&obst, sizeof(info[0]));
281         memset(info, 0, sizeof(info[0]));
282         info->loop = loop;
283         loop->link = info;
284         return info;
285 }
286
287 /**
288  * constructs loop in+out edges when called in a block walker
289  */
290 static void construct_loop_edges(ir_node *block, void *data)
291 {
292         int      n_cfgpreds = get_Block_n_cfgpreds(block);
293         ir_loop *loop       = get_irn_loop(block);
294         int      i;
295
296         (void) data;
297
298         for (i = 0; i < n_cfgpreds; ++i) {
299                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
300                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
301                 loop_edge_t *edge;
302                 bool        is_exit_edge;
303                 ir_loop     *l, *goal;
304
305                 if (cfgpred_loop == loop)
306                         continue;
307
308                 /* critical edges are splitted, so we can't jump from 1 loop
309                  * directly into another without going out of it */
310                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
311
312                 /* edge out of loop */
313                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
314                         is_exit_edge = true;
315                         l            = cfgpred_loop;
316                         goal         = loop;
317                 } else {
318                         is_exit_edge = false;
319                         l            = loop;
320                         goal         = cfgpred_loop;
321                 }
322
323                 /* this might be a jump out/in multiple loops */
324                 do {
325                         loop_info_t *l_info = get_loop_info(l);
326
327                         edge = obstack_alloc(&obst, sizeof(edge[0]));
328                         memset(edge, 0, sizeof(edge[0]));
329                         edge->block = block;
330                         edge->pos   = i;
331
332                         if (is_exit_edge) {
333                                 edge->next         = l_info->exit_edges;
334                                 l_info->exit_edges = edge;
335                                 assert(get_loop_depth(loop) < get_loop_depth(l));
336                         } else {
337                                 edge->next          = l_info->entry_edges;
338                                 l_info->entry_edges = edge;
339                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
340                         }
341                         l = get_loop_outer_loop(l);
342                 } while(l != goal);
343         }
344 }
345
346
347
348
349 static void place_reload(worklist_entry_t *entry)
350 {
351         if (tentative_mode)
352                 return;
353
354         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
355                 entry->reload_point));
356         assert(entry->reload_point != NULL);
357         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
358         entry->reload_point = NULL;
359 }
360
361 /**
362  * makes sure the worklist contains not more than n_regs - room_needed entries
363  */
364 static void make_room(worklist_t *worklist, size_t room_needed)
365 {
366         int               i;
367         int               spills_needed;
368         struct list_head *entry;
369
370         spills_needed = worklist->n_live_values + room_needed - n_regs;
371         if (spills_needed <= 0)
372                 return;
373
374         entry = worklist->live_values.next;
375         for(i = spills_needed; i > 0; --i) {
376                 struct list_head *next = entry->next;
377                 worklist_entry_t *wl_entry
378                         = list_entry(entry, worklist_entry_t, head);
379
380                 assert(worklist_contains(wl_entry->value));
381                 mark_irn_not_visited(wl_entry->value);
382                 place_reload(wl_entry);
383                 list_del(entry);
384
385                 entry = next;
386         }
387         worklist->n_live_values -= spills_needed;
388 }
389
390 /**
391  * a value was used, so bring it to the back of the worklist (which might
392  * result in a spill of another value).
393  */
394 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
395 {
396         /* already in the worklist? move around, otherwise add at back */
397         worklist_entry_t *entry = get_irn_link(value);
398
399         assert(arch_irn_consider_in_reg_alloc(cls, value));
400
401         if (worklist_contains(value)) {
402                 assert(entry != NULL);
403
404                 list_del(&entry->head);
405         } else {
406                 if (entry == NULL) {
407                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
408                         memset(entry, 0, sizeof(entry[0]));
409
410                         entry->value = value;
411                         set_irn_link(value, entry);
412                 }
413
414                 ++worklist->n_live_values;
415                 mark_irn_visited(value);
416         }
417
418         entry->reload_point = sched_point;
419         list_add_tail(&entry->head, &worklist->live_values);
420 }
421
422 static void worklist_remove(worklist_t *worklist, ir_node *value)
423 {
424         worklist_entry_t *entry = get_irn_link(value);
425         assert(entry != NULL);
426         list_del(&entry->head);
427         --worklist->n_live_values;
428
429         assert(worklist_contains(value));
430         mark_irn_not_visited(value);
431 }
432
433 static void update_max_pressure(worklist_t *worklist)
434 {
435         if (worklist->n_live_values > max_register_pressure)
436                 max_register_pressure = worklist->n_live_values;
437 }
438
439 static void do_spilling(ir_node *block, worklist_t *worklist)
440 {
441         ir_node *node;
442
443         assert(worklist != NULL);
444
445         sched_foreach_reverse(block, node) {
446                 int    i, arity;
447                 size_t n_defs = 0;
448
449                 DB((dbg, LEVEL_2, "\t%+F... ", node));
450                 update_max_pressure(worklist);
451
452                 if (is_Phi(node)) {
453                         ir_node *node2;
454                         /* TODO: if we have some free registers, then we could decide to
455                          * not spill some phis (but not for phis where at least 1 input is
456                          * themselfes) */
457
458                         /* we have to spill all phis that are not live */
459                         sched_foreach_reverse_from(node, node2) {
460                                 assert(is_Phi(node2));
461
462                                 if (worklist_contains(node2))
463                                         continue;
464                                 if (!arch_irn_consider_in_reg_alloc(cls, node2))
465                                         continue;
466
467                                 if (!tentative_mode)
468                                         be_spill_phi(senv, node2);
469                         }
470                         DB((dbg, LEVEL_2, "\n"));
471                         break;
472                 }
473
474                 /* remove values defined by this instruction from the workset. Values
475                  * defined but not in the workset need free registers */
476                 if (get_irn_mode(node) == mode_T) {
477                         const ir_edge_t *edge;
478
479                         foreach_out_edge(node, edge) {
480                                 ir_node *proj = get_edge_src_irn(edge);
481                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
482                                         continue;
483                                 if (worklist_contains(proj)) {
484                                         worklist_remove(worklist, proj);
485                                 } else {
486                                         ++n_defs;
487                                 }
488                         }
489                 } else if (arch_irn_consider_in_reg_alloc(cls, node)) {
490                         if (worklist_contains(node)) {
491                                 worklist_remove(worklist, node);
492                         } else {
493                                 n_defs = 1;
494                         }
495                 }
496
497                 /* make sure we have enough free registers for the spills */
498                 make_room(worklist, n_defs);
499
500                 /* put all values used by the instruction into the workset */
501                 arity = get_irn_arity(node);
502                 for(i = 0; i < arity; ++i) {
503                         ir_node *use = get_irn_n(node, i);
504
505                         if (!arch_irn_consider_in_reg_alloc(cls, use))
506                                 continue;
507
508                         val_used(worklist, use, node);
509                 }
510
511                 /* we might have too many values in the worklist now and need to spill
512                  * some */
513                 make_room(worklist, 0);
514
515 #ifdef DEBUG_libfirm
516                 print_worklist(worklist, LEVEL_2);
517                 DB((dbg, LEVEL_2, "\n"));
518 #endif
519         }
520
521         update_max_pressure(worklist);
522
523 #ifdef DEBUG_libfirm
524         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
525         print_worklist(worklist, LEVEL_1);
526         DB((dbg, LEVEL_1, "\n"));
527 #endif
528 }
529
530 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
531 {
532         const struct list_head *i1 = &wl1->live_values;
533         const struct list_head *i2 = &wl2->live_values;
534
535         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
536                         i1 = i1->next, i2 = i2->next) {
537                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
538                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
539
540                 if (entry1->value != entry2->value)
541                         return false;
542         }
543         /* both lists at the end */
544         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
545                 return false;
546
547         return true;
548 }
549
550 static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
551 {
552         double           best_execfreq   = -1;
553         worklist_t      *best_worklist   = NULL;
554         ir_node         *best_succ_block = NULL;
555         int              best_pos;
556         const ir_edge_t *edge;
557
558         /* construct worklist */
559         foreach_block_succ(block, edge) {
560                 ir_node      *succ_block = get_edge_src_irn(edge);
561                 double       execfreq    = get_block_execfreq(exec_freq, succ_block);
562                 block_info_t *block_info;
563                 worklist_t   *succ_worklist;
564
565                 if (execfreq < best_execfreq)
566                         continue;
567
568                 block_info    = get_block_info(succ_block);
569                 succ_worklist = block_info->start_worklist;
570
571                 if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
572                         continue;
573
574                 best_execfreq   = execfreq;
575                 best_worklist   = succ_worklist;
576                 best_succ_block = succ_block;
577                 best_pos        = get_edge_src_pos(edge);
578         }
579
580         if (best_worklist == NULL)
581                 return false;
582         best_worklist->visited = worklist_visited;
583
584         fill_and_activate_worklist(new_worklist, best_worklist, block,
585                         best_succ_block, best_pos);
586         return true;
587 }
588
589 static worklist_t *construct_start_worklist(ir_node *block)
590 {
591         worklist_t *worklist = new_worklist();
592
593         ++worklist_visited;
594
595         while(fill_start_worklist(worklist, block)) {
596                 if (worklist->n_live_values >= n_regs)
597                         break;
598         }
599
600         return worklist;
601 }
602
603 static void process_block(ir_node *block, void *env)
604 {
605         block_info_t    *block_info;
606         worklist_t      *worklist;
607         int              n_preds;
608
609         (void) env;
610         DB((dbg, LEVEL_1, "Processing %+F\n", block));
611
612         worklist   = construct_start_worklist(block);
613         block_info = get_block_info(block);
614
615 #ifdef DEBUG_libfirm
616         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
617         print_worklist(worklist, LEVEL_1);
618         DB((dbg, LEVEL_1, "\n"));
619
620         if (should_have_reached_fixpoint) {
621                 assert(worklists_equal(block_info->end_worklist, worklist));
622         }
623 #endif
624         block_info->end_worklist = duplicate_worklist(worklist);
625
626         do_spilling(block, worklist);
627         deactivate_worklist(worklist);
628
629 #ifdef DEBUG_libfirm
630         if (should_have_reached_fixpoint) {
631                 assert(worklists_equal(block_info->start_worklist, worklist));
632         }
633 #endif
634         block_info->start_worklist = worklist;
635
636         /* we shouldn't have any live values left at the start block */
637         n_preds = get_Block_n_cfgpreds(block);
638         //assert(n_preds != 0 || worklist->n_live_values == 0);
639 }
640
641 typedef struct block_or_loop_t block_or_loop_t;
642 struct block_or_loop_t {
643         union {
644                 ir_node *block;
645                 ir_loop *loop;
646         } v;
647         bool is_loop;
648 };
649
650 static block_or_loop_t *loop_blocks;
651 static ir_loop         *current_loop;
652
653 static void find_blocks(ir_node *block);
654
655 static void find_in_loop(ir_loop *loop, ir_node *entry)
656 {
657         loop_info_t     *loop_info = get_loop_info(loop);
658         block_or_loop_t block_or_loop;
659         loop_edge_t     *edge;
660
661         /* simply mark 1 block in the loop to indicate that the loop was already
662          * processed */
663         ir_node *some_block = loop_info->entry_edges->block;
664         if (Block_block_visited(some_block))
665                 return;
666
667         block_or_loop.v.loop  = loop;
668         block_or_loop.is_loop = true;
669         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
670
671 #ifndef NDEBUG
672         {
673                 /* we should be 1 of the entry blocks */
674                 loop_edge_t *edge  = loop_info->entry_edges;
675                 bool         found = false;
676                 for ( ; edge != NULL; edge = edge->next) {
677                         if (edge->block == entry)
678                                 found = true;
679                 }
680                 assert(found);
681         }
682 #else
683         (void) entry;
684 #endif
685
686         /* check all loop successors */
687         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
688                 ir_node *succ      = edge->block;
689                 ir_loop *succ_loop = get_irn_loop(succ);
690
691                 if (succ_loop == current_loop) {
692                         find_blocks(succ);
693                 } else {
694                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
695                 }
696         }
697 }
698
699 static void find_blocks(ir_node *block)
700 {
701         const ir_edge_t *edge;
702         block_or_loop_t block_or_loop;
703
704         if (Block_block_visited(block))
705                 return;
706
707         block_or_loop.v.block = block;
708         block_or_loop.is_loop = false;
709
710         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
711         mark_Block_block_visited(block);
712
713         foreach_block_succ(block, edge) {
714                 ir_node *succ = get_edge_src_irn(edge);
715
716                 /* is it part of the current loop? */
717                 ir_loop *loop = get_irn_loop(succ);
718                 if (loop != current_loop) {
719                         /* a sub-loop? */
720                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
721                                 find_in_loop(loop, succ);
722                         } else {
723                                 /* parent loop: we're not interested in the block */
724                         }
725                 } else {
726                         find_blocks(succ);
727                 }
728         }
729 }
730
731 /**
732  * append an entry to a worklist. WARNING: The entry must not already be in the
733  * worklist.
734  */
735 static void worklist_append(worklist_t *worklist, ir_node *value,
736                             ir_node *reload_point,
737                                                         ir_loop *unused_livethrough_loop)
738 {
739         worklist_entry_t *entry     = obstack_alloc(&obst, sizeof(entry[0]));
740         memset(entry, 0, sizeof(entry[0]));
741
742 #ifdef EXPENSIVE_CHECKS
743         {
744                 struct list_head *entry;
745                 list_for_each(entry, &worklist->live_values) {
746                         worklist_entry_t *wl_entry
747                                 = list_entry(entry, worklist_entry_t, head);
748                         assert(wl_entry->value != value);
749                 }
750         }
751 #endif
752
753         entry->value                   = value;
754         entry->reload_point            = reload_point;
755         entry->unused_livethrough_loop = unused_livethrough_loop;
756         list_add_tail(&entry->head, &worklist->live_values);
757         ++worklist->n_live_values;
758         assert(worklist->n_live_values <= n_regs);
759 }
760
761 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
762 {
763         loop_edge_t *edge;
764         ++worklist_visited;
765
766         /* add the value to all loop exit and entry blocks */
767         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
768                 ir_node            *block
769                         = get_Block_cfgpred_block(edge->block, edge->pos);
770                 const block_info_t *info     = get_block_info(block);
771                 worklist_t         *worklist = info->end_worklist;
772                 ir_node            *reload_point = NULL;
773
774                 if (worklist->visited >= worklist_visited)
775                         continue;
776                 worklist->visited = worklist_visited;
777
778                 /* TODO: we need a smarter mechanism here, that makes the reloader place
779                  * reload nodes on all loop exits... */
780
781                 worklist_append(worklist, value, reload_point, loop_info->loop);
782         }
783         edge = loop_info->entry_edges;
784         for ( ; edge != NULL; edge = edge->next) {
785                 ir_node            *entry_block = edge->block;
786                 const block_info_t *info        = get_block_info(entry_block);
787                 worklist_t         *worklist    = info->start_worklist;
788                 ir_node            *pred_block;
789                 ir_node            *reload_point;
790
791                 if (worklist->visited >= worklist_visited)
792                         continue;
793                 worklist->visited = worklist_visited;
794
795                 pred_block   = get_Block_cfgpred_block(entry_block, edge->pos);
796                 reload_point = be_get_end_of_block_insertion_point(pred_block);
797
798                 worklist_append(worklist, value, reload_point, loop_info->loop);
799         }
800
801         set_irn_link(value, NULL);
802         ++loop_info->max_register_pressure;
803 }
804
805 static void push_unused_livethroughs(loop_info_t *loop_info)
806 {
807         loop_edge_t *edge;
808
809         /* we can only push unused livethroughs if register pressure inside the loop
810          * was low enough */
811         if (loop_info->max_register_pressure >= n_regs)
812                 return;
813
814         /* find unused livethroughs: register pressure in the loop was low enough
815          * which means that we had no spills which implies that at every point in
816          * the loop all*/
817         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
818                 ir_node            *block          = edge->block;
819                 const block_info_t *info           = get_block_info(block);
820                 worklist_t         *start_worklist = info->start_worklist;
821                 ir_node            *exit_block;
822                 const block_info_t *exit_info;
823                 worklist_t         *end_worklist;
824                 struct list_head   *entry;
825
826                 if (start_worklist == NULL)
827                         continue;
828
829                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
830                 exit_info    = get_block_info(exit_block);
831                 end_worklist = exit_info->end_worklist;
832
833                 activate_worklist(end_worklist);
834                 /* all values contained in the start_worklist, which are not available
835                  * in the end_worklist, must be unused livethroughs */
836
837                 list_for_each(entry, &start_worklist->live_values) {
838                         worklist_entry_t *wl_entry
839                                 = list_entry(entry, worklist_entry_t, head);
840                         ir_node *value = wl_entry->value;
841
842                         if (loop_info->max_register_pressure >= n_regs)
843                                 break;
844
845
846                         if (!worklist_contains(value)) {
847                                 /* add it to all loop exits */
848                                 DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
849                                 push_unused_livethrough(loop_info, value);
850                                 /* the value should now be part of end_worklist, so mark it */
851                                 mark_irn_visited(value);
852                         }
853                 }
854
855                 deactivate_worklist(end_worklist);
856         }
857 }
858
859 static void process_block_or_loop(const block_or_loop_t *block_or_loop)
860 {
861         if (block_or_loop->is_loop) {
862                 loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop);
863
864                 if (do_push_unused_livethroughs)
865                         push_unused_livethroughs(loop_info);
866
867                 if (loop_info->max_register_pressure > max_register_pressure)
868                         max_register_pressure = loop_info->max_register_pressure;
869
870                 return;
871         }
872         process_block(block_or_loop->v.block, NULL);
873 }
874
875 static void process_loop(ir_loop *loop)
876 {
877         int         n_elements = get_loop_n_elements(loop);
878         int         i, len;
879         loop_info_t *loop_info;
880         loop_edge_t *edge;
881         ir_node     *some_block;
882
883         /* first handle all sub-loops */
884         for (i = 0; i < n_elements; ++i) {
885                 loop_element element = get_loop_element(loop, i);
886                 if (*element.kind != k_ir_loop)
887                         continue;
888
889                 process_loop(element.son);
890         }
891
892         /* create a postorder of the blocks */
893         loop_info = get_loop_info(loop);
894         edge      = loop_info->entry_edges;
895         if (edge != NULL) {
896                 some_block = edge->block;
897         } else {
898                 assert(loop == get_irg_loop(current_ir_graph));
899                 some_block = get_irg_start_block(current_ir_graph);
900         }
901
902         loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
903         current_loop = loop;
904
905         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
906         inc_irg_block_visited(current_ir_graph);
907         find_blocks(some_block);
908         /* for endless loops the end-block might be unreachable */
909         if (loop == get_irg_loop(current_ir_graph)) {
910                 find_blocks(get_irg_end_block(current_ir_graph));
911         }
912         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
913
914         DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
915         len = ARR_LEN(loop_blocks);
916         for (i = 0; i < len; ++i) {
917                 block_or_loop_t *block_or_loop = &loop_blocks[i];
918                 if (block_or_loop->is_loop) {
919                         DB((dbg, LEVEL_3, " L-%p", block_or_loop->v.loop));
920                 } else {
921                         DB((dbg, LEVEL_3, " B-%+F", block_or_loop->v.block));
922                 }
923         }
924         DB((dbg, LEVEL_3, "\n"));
925
926         max_register_pressure = 0;
927
928         /* run1: tentative phase */
929         tentative_mode = true;
930         for (i = len-1; i >= 0; --i) {
931                 process_block_or_loop(&loop_blocks[i]);
932         }
933
934         /* run2: tentative phase - should reach fixpoint */
935         tentative_mode = true;
936         for (i = len-1; i >= 0; --i) {
937                 process_block_or_loop(&loop_blocks[i]);
938         }
939
940 #ifndef NDEBUG
941         /* run3: tentative phase - check fixpoint */
942         tentative_mode               = true;
943         should_have_reached_fixpoint = true;
944         for (i = len-1; i >= 0; --i) {
945                 process_block_or_loop(&loop_blocks[i]);
946         }
947         should_have_reached_fixpoint = false;
948 #endif
949
950         /* run4: add spills/reloads */
951         tentative_mode              = false;
952         do_push_unused_livethroughs = true;
953         for (i = len-1; i >= 0; --i) {
954                 process_block_or_loop(&loop_blocks[i]);
955         }
956         do_push_unused_livethroughs = false;
957
958         loop_info->max_register_pressure = max_register_pressure;
959         DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
960                                 (unsigned) max_register_pressure));
961
962         DEL_ARR_F(loop_blocks);
963 }
964
965 static void fix_block_borders(ir_node *block, void *data)
966 {
967         block_info_t *block_info     = get_block_info(block);
968         worklist_t   *start_worklist = block_info->start_worklist;
969         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
970         ir_loop      *loop           = get_irn_loop(block);
971         int           i;
972
973         (void) data;
974         assert(start_worklist != NULL);
975
976         for (i = 0; i < n_cfgpreds; ++i) {
977                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
978                 block_info_t *pred_block_info = get_block_info(pred_block);
979                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
980                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
981                 bool          is_loop_entry   = false;
982                 struct list_head *entry;
983
984                 assert(end_worklist != NULL);
985
986                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
987                         is_loop_entry = true;
988                 }
989
990                 /* reload missing values */
991                 activate_worklist(end_worklist);
992
993                 list_for_each(entry, &start_worklist->live_values) {
994                         worklist_entry_t *wl_entry
995                                 = list_entry(entry, worklist_entry_t, head);
996                         ir_node          *value = wl_entry->value;
997
998                         if (is_Phi(value) && get_nodes_block(value) == block) {
999                                 value = get_irn_n(value, i);
1000
1001                                 /* we might have unknowns as argument for the phi */
1002                                 if (!arch_irn_consider_in_reg_alloc(cls, value))
1003                                         continue;
1004                         }
1005
1006                         if (worklist_contains(value))
1007                                 continue;
1008                         if (wl_entry->unused_livethrough_loop != NULL && !is_loop_entry)
1009                                 continue;
1010
1011                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
1012                 }
1013
1014                 deactivate_worklist(end_worklist);
1015         }
1016 }
1017
1018 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
1019 {
1020         ir_graph *irg = be_get_birg_irg(birg);
1021
1022         cls    = ncls;
1023         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1024
1025         /* shortcut for register classes with ignore regs only */
1026         if (n_regs == 0)
1027                 return;
1028
1029         worklist_visited = 0;
1030         exec_freq        = be_get_birg_exec_freq(birg);
1031
1032         be_clear_links(irg);
1033         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1034                         | IR_RESOURCE_LOOP_LINK);
1035         inc_irg_visited(irg);
1036
1037         obstack_init(&obst);
1038         senv = be_new_spill_env(birg);
1039
1040         assure_cf_loop(irg);
1041         clear_loop_info(get_irg_loop(irg));
1042         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
1043
1044         process_loop(get_irg_loop(current_ir_graph));
1045
1046         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1047
1048         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1049                         | IR_RESOURCE_LOOP_LINK);
1050
1051         be_insert_spills_reloads(senv);
1052
1053         obstack_free(&obst, NULL);
1054
1055         /* clean up */
1056         be_delete_spill_env(senv);
1057 }
1058
1059 void be_init_spillbelady3(void)
1060 {
1061         static be_spiller_t belady3_spiller = {
1062                 be_spill_belady3
1063         };
1064
1065         be_register_spiller("belady3", &belady3_spiller);
1066         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1067 }
1068
1069 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);