- all visited flags use the ir_visited_t type now
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include <stdbool.h>
37
38 #include "debug.h"
39 #include "list.h"
40 #include "pdeq.h"
41
42 #include "irnode_t.h"
43 #include "irprintf.h"
44 #include "iredges_t.h"
45 #include "irloop_t.h"
46 #include "irgwalk.h"
47 #include "execfreq.h"
48
49 #include "bemodule.h"
50 #include "bespill.h"
51 #include "beutil.h"
52 #include "bespilloptions.h"
53 #include "besched_t.h"
54 #include "be_t.h"
55
56 #define EXPENSIVE_CHECKS
57
58 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
59
60 typedef struct loop_edge_t loop_edge_t;
61 struct loop_edge_t {
62         ir_node     *block;
63         int          pos;
64         loop_edge_t *next;
65 };
66
67 typedef struct loop_info_t loop_info_t;
68 struct loop_info_t {
69         ir_loop     *loop;
70         loop_edge_t *entry_edges;
71         loop_edge_t *exit_edges;
72         size_t       max_register_pressure;
73 };
74
75 typedef struct worklist_entry_t worklist_entry_t;
76 struct worklist_entry_t {
77         struct list_head  head;
78         ir_node          *value;
79         ir_node          *reload_point;
80         ir_loop          *unused_livethrough_loop;
81 };
82
83 typedef struct worklist_t worklist_t;
84 struct worklist_t {
85         struct list_head  live_values;
86         size_t            n_live_values;
87         ir_visited_t      visited;
88 };
89
90 typedef struct block_info_t block_info_t;
91 struct block_info_t {
92         worklist_t *start_worklist;
93         worklist_t *end_worklist;
94 };
95
96 static const arch_env_t            *arch_env;
97 static const arch_register_class_t *cls;
98 static struct obstack               obst;
99 static spill_env_t                 *senv;
100 static size_t                       n_regs;
101 static size_t                       max_register_pressure;
102 static bool                         tentative_mode;
103 static bool                         should_have_reached_fixpoint;
104 static bool                         do_push_unused_livethroughs;
105 static ir_exec_freq                *exec_freq;
106 static ir_visited_t                 worklist_visited;
107
108 static worklist_t *new_worklist(void)
109 {
110         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
111         memset(worklist, 0, sizeof(worklist[0]));
112
113         INIT_LIST_HEAD(&worklist->live_values);
114         worklist->n_live_values    = 0;
115
116         return worklist;
117 }
118
119 static void mark_irn_not_visited(ir_node *node)
120 {
121         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
122 }
123
124 static bool worklist_contains(const ir_node *node)
125 {
126         return irn_visited(node);
127 }
128
129 static block_info_t *get_block_info(ir_node *block)
130 {
131         block_info_t *info = get_irn_link(block);
132         if (info != NULL)
133                 return info;
134
135         info = obstack_alloc(&obst, sizeof(info[0]));
136         memset(info, 0, sizeof(info[0]));
137         set_irn_link(block, info);
138         return info;
139 }
140
141 static void deactivate_worklist(const worklist_t *worklist)
142 {
143         struct list_head *entry;
144
145         list_for_each(entry, &worklist->live_values) {
146                 worklist_entry_t *wl_entry
147                         = list_entry(entry, worklist_entry_t, head);
148                 assert(worklist_contains(wl_entry->value));
149                 mark_irn_not_visited(wl_entry->value);
150                 set_irn_link(wl_entry->value, NULL);
151         }
152 }
153
154 static void activate_worklist(const worklist_t *worklist)
155 {
156         struct list_head *entry;
157
158         list_for_each(entry, &worklist->live_values) {
159                 worklist_entry_t *wl_entry
160                         = list_entry(entry, worklist_entry_t, head);
161                 assert(!worklist_contains(wl_entry->value));
162                 mark_irn_visited(wl_entry->value);
163                 set_irn_link(wl_entry->value, wl_entry);
164         }
165 }
166
167 /**
168  * duplicate a worklist and directly activates it
169  */
170 static void fill_and_activate_worklist(worklist_t *new_worklist,
171                 const worklist_t *worklist, ir_node *block, ir_node *succ_block,
172                 int succ_pos)
173 {
174         ir_node          *reload_point  = NULL;
175         size_t            n_live_values = 0;
176         struct list_head *entry;
177
178         if (succ_block != NULL &&
179                         (get_Block_n_cfgpreds(succ_block) > 1
180                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
181                 reload_point = be_get_end_of_block_insertion_point(block);
182         }
183
184         list_for_each(entry, &worklist->live_values) {
185                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
186                 ir_node          *value     = wl_entry->value;
187                 worklist_entry_t *new_entry;
188
189                 if (new_worklist->n_live_values >= n_regs)
190                         break;
191
192                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
193                         value = get_Phi_pred(value, succ_pos);
194
195                         /* can happen for unknown phi preds */
196                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
197                                 continue;
198                 }
199
200                 if (irn_visited(value))
201                         continue;
202
203                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
204                 memset(new_entry, 0, sizeof(new_entry[0]));
205
206                 new_entry->value = value;
207                 if (reload_point != NULL) {
208                         new_entry->reload_point = reload_point;
209                 } else {
210                         new_entry->reload_point = wl_entry->reload_point;
211                 }
212
213                 list_add_tail(&new_entry->head, &new_worklist->live_values);
214                 ++n_live_values;
215
216                 mark_irn_visited(value);
217                 set_irn_link(value, new_entry);
218                 new_worklist->n_live_values++;
219         }
220 }
221
222 static worklist_t *duplicate_worklist(const worklist_t *worklist)
223 {
224         worklist_t       *new_worklist;
225         struct list_head *entry;
226
227         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
228         memset(new_worklist, 0, sizeof(new_worklist[0]));
229         INIT_LIST_HEAD(&new_worklist->live_values);
230         new_worklist->n_live_values = worklist->n_live_values;
231
232         list_for_each(entry, &worklist->live_values) {
233                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
234                 worklist_entry_t *new_entry
235                         = obstack_alloc(&obst, sizeof(new_entry[0]));
236
237                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
238                 list_add_tail(&new_entry->head, &new_worklist->live_values);
239         }
240
241         return new_worklist;
242 }
243
244 #ifdef DEBUG_libfirm
245 static void print_worklist(const worklist_t *worklist, int level)
246 {
247         struct list_head *entry;
248
249         DB((dbg, level, "%d values: ", worklist->n_live_values));
250         list_for_each(entry, &worklist->live_values) {
251                 worklist_entry_t *wl_entry
252                         = list_entry(entry, worklist_entry_t, head);
253
254                 DB((dbg, level, "%+F ", wl_entry->value));
255         }
256 }
257 #endif
258
259
260
261 static void clear_loop_info(ir_loop *loop)
262 {
263         int n_elements = get_loop_n_elements(loop);
264         int i;
265
266         loop->link = NULL;
267         for (i = 0; i < n_elements; ++i) {
268                 loop_element element = get_loop_element(loop, i);
269                 if (*element.kind != k_ir_loop)
270                         continue;
271
272                 clear_loop_info(element.son);
273         }
274 }
275
276 static loop_info_t *get_loop_info(ir_loop *loop)
277 {
278         loop_info_t *info = loop->link;
279         if (info != NULL)
280                 return info;
281
282         info = obstack_alloc(&obst, sizeof(info[0]));
283         memset(info, 0, sizeof(info[0]));
284         info->loop = loop;
285         loop->link = info;
286         return info;
287 }
288
289 /**
290  * constructs loop in+out edges when called in a block walker
291  */
292 static void construct_loop_edges(ir_node *block, void *data)
293 {
294         int      n_cfgpreds = get_Block_n_cfgpreds(block);
295         ir_loop *loop       = get_irn_loop(block);
296         int      i;
297
298         (void) data;
299
300         for (i = 0; i < n_cfgpreds; ++i) {
301                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
302                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
303                 loop_edge_t *edge;
304                 bool        is_exit_edge;
305                 ir_loop     *l, *goal;
306
307                 if (cfgpred_loop == loop)
308                         continue;
309
310                 /* critical edges are splitted, so we can't jump from 1 loop
311                  * directly into another without going out of it */
312                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
313
314                 /* edge out of loop */
315                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
316                         is_exit_edge = true;
317                         l            = cfgpred_loop;
318                         goal         = loop;
319                 } else {
320                         is_exit_edge = false;
321                         l            = loop;
322                         goal         = cfgpred_loop;
323                 }
324
325                 /* this might be a jump out/in multiple loops */
326                 do {
327                         loop_info_t *l_info = get_loop_info(l);
328
329                         edge = obstack_alloc(&obst, sizeof(edge[0]));
330                         memset(edge, 0, sizeof(edge[0]));
331                         edge->block = block;
332                         edge->pos   = i;
333
334                         if (is_exit_edge) {
335                                 edge->next         = l_info->exit_edges;
336                                 l_info->exit_edges = edge;
337                                 assert(get_loop_depth(loop) < get_loop_depth(l));
338                         } else {
339                                 edge->next          = l_info->entry_edges;
340                                 l_info->entry_edges = edge;
341                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
342                         }
343                         l = get_loop_outer_loop(l);
344                 } while(l != goal);
345         }
346 }
347
348
349
350
351 static void place_reload(worklist_entry_t *entry)
352 {
353         if (tentative_mode)
354                 return;
355
356         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
357                 entry->reload_point));
358         assert(entry->reload_point != NULL);
359         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
360         entry->reload_point = NULL;
361 }
362
363 /**
364  * makes sure the worklist contains not more than n_regs - room_needed entries
365  */
366 static void make_room(worklist_t *worklist, size_t room_needed)
367 {
368         int               i;
369         int               spills_needed;
370         struct list_head *entry;
371
372         spills_needed = worklist->n_live_values + room_needed - n_regs;
373         if (spills_needed <= 0)
374                 return;
375
376         entry = worklist->live_values.next;
377         for(i = spills_needed; i > 0; --i) {
378                 struct list_head *next = entry->next;
379                 worklist_entry_t *wl_entry
380                         = list_entry(entry, worklist_entry_t, head);
381
382                 assert(worklist_contains(wl_entry->value));
383                 mark_irn_not_visited(wl_entry->value);
384                 place_reload(wl_entry);
385                 list_del(entry);
386
387                 entry = next;
388         }
389         worklist->n_live_values -= spills_needed;
390 }
391
392 /**
393  * a value was used, so bring it to the back of the worklist (which might
394  * result in a spill of another value).
395  */
396 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
397 {
398         /* already in the worklist? move around, otherwise add at back */
399         worklist_entry_t *entry = get_irn_link(value);
400
401         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, value));
402
403         if (worklist_contains(value)) {
404                 assert(entry != NULL);
405
406                 list_del(&entry->head);
407         } else {
408                 if (entry == NULL) {
409                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
410                         memset(entry, 0, sizeof(entry[0]));
411
412                         entry->value = value;
413                         set_irn_link(value, entry);
414                 }
415
416                 ++worklist->n_live_values;
417                 mark_irn_visited(value);
418         }
419
420         entry->reload_point = sched_point;
421         list_add_tail(&entry->head, &worklist->live_values);
422 }
423
424 static void worklist_remove(worklist_t *worklist, ir_node *value)
425 {
426         worklist_entry_t *entry = get_irn_link(value);
427         assert(entry != NULL);
428         list_del(&entry->head);
429         --worklist->n_live_values;
430
431         assert(worklist_contains(value));
432         mark_irn_not_visited(value);
433 }
434
435 static void update_max_pressure(worklist_t *worklist)
436 {
437         if (worklist->n_live_values > max_register_pressure)
438                 max_register_pressure = worklist->n_live_values;
439 }
440
441 static void do_spilling(ir_node *block, worklist_t *worklist)
442 {
443         ir_node *node;
444
445         assert(worklist != NULL);
446
447         sched_foreach_reverse(block, node) {
448                 int    i, arity;
449                 size_t n_defs = 0;
450
451                 DB((dbg, LEVEL_2, "\t%+F... ", node));
452                 update_max_pressure(worklist);
453
454                 if (is_Phi(node)) {
455                         ir_node *node2;
456                         /* TODO: if we have some free registers, then we could decide to
457                          * not spill some phis (but not for phis where at least 1 input is
458                          * themselfes) */
459
460                         /* we have to spill all phis that are not live */
461                         sched_foreach_reverse_from(node, node2) {
462                                 assert(is_Phi(node2));
463
464                                 if (worklist_contains(node2))
465                                         continue;
466                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node2))
467                                         continue;
468
469                                 if (!tentative_mode)
470                                         be_spill_phi(senv, node2);
471                         }
472                         DB((dbg, LEVEL_2, "\n"));
473                         break;
474                 }
475
476                 /* remove values defined by this instruction from the workset. Values
477                  * defined but not in the workset need free registers */
478                 if (get_irn_mode(node) == mode_T) {
479                         const ir_edge_t *edge;
480
481                         foreach_out_edge(node, edge) {
482                                 ir_node *proj = get_edge_src_irn(edge);
483                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
484                                         continue;
485                                 if (worklist_contains(proj)) {
486                                         worklist_remove(worklist, proj);
487                                 } else {
488                                         ++n_defs;
489                                 }
490                         }
491                 } else if (arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
492                         if (worklist_contains(node)) {
493                                 worklist_remove(worklist, node);
494                         } else {
495                                 n_defs = 1;
496                         }
497                 }
498
499                 /* make sure we have enough free registers for the spills */
500                 make_room(worklist, n_defs);
501
502                 /* put all values used by the instruction into the workset */
503                 arity = get_irn_arity(node);
504                 for(i = 0; i < arity; ++i) {
505                         ir_node *use = get_irn_n(node, i);
506
507                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, use))
508                                 continue;
509
510                         val_used(worklist, use, node);
511                 }
512
513                 /* we might have too many values in the worklist now and need to spill
514                  * some */
515                 make_room(worklist, 0);
516
517 #ifdef DEBUG_libfirm
518                 print_worklist(worklist, LEVEL_2);
519                 DB((dbg, LEVEL_2, "\n"));
520 #endif
521         }
522
523         update_max_pressure(worklist);
524
525 #ifdef DEBUG_libfirm
526         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
527         print_worklist(worklist, LEVEL_1);
528         DB((dbg, LEVEL_1, "\n"));
529 #endif
530 }
531
532 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
533 {
534         const struct list_head *i1 = &wl1->live_values;
535         const struct list_head *i2 = &wl2->live_values;
536
537         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
538                         i1 = i1->next, i2 = i2->next) {
539                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
540                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
541
542                 if (entry1->value != entry2->value)
543                         return false;
544         }
545         /* both lists at the end */
546         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
547                 return false;
548
549         return true;
550 }
551
552 static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
553 {
554         double           best_execfreq   = -1;
555         worklist_t      *best_worklist   = NULL;
556         ir_node         *best_succ_block = NULL;
557         int              best_pos;
558         const ir_edge_t *edge;
559
560         /* construct worklist */
561         foreach_block_succ(block, edge) {
562                 ir_node      *succ_block = get_edge_src_irn(edge);
563                 double       execfreq    = get_block_execfreq(exec_freq, succ_block);
564                 block_info_t *block_info;
565                 worklist_t   *succ_worklist;
566
567                 if (execfreq < best_execfreq)
568                         continue;
569
570                 block_info    = get_block_info(succ_block);
571                 succ_worklist = block_info->start_worklist;
572
573                 if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
574                         continue;
575
576                 best_execfreq   = execfreq;
577                 best_worklist   = succ_worklist;
578                 best_succ_block = succ_block;
579                 best_pos        = get_edge_src_pos(edge);
580         }
581
582         if (best_worklist == NULL)
583                 return false;
584         best_worklist->visited = worklist_visited;
585
586         fill_and_activate_worklist(new_worklist, best_worklist, block,
587                         best_succ_block, best_pos);
588         return true;
589 }
590
591 static worklist_t *construct_start_worklist(ir_node *block)
592 {
593         worklist_t *worklist = new_worklist();
594
595         ++worklist_visited;
596
597         while(fill_start_worklist(worklist, block)) {
598                 if (worklist->n_live_values >= n_regs)
599                         break;
600         }
601
602         return worklist;
603 }
604
605 static void process_block(ir_node *block, void *env)
606 {
607         block_info_t    *block_info;
608         worklist_t      *worklist;
609         int              n_preds;
610
611         (void) env;
612         DB((dbg, LEVEL_1, "Processing %+F\n", block));
613
614         worklist   = construct_start_worklist(block);
615         block_info = get_block_info(block);
616
617 #ifdef DEBUG_libfirm
618         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
619         print_worklist(worklist, LEVEL_1);
620         DB((dbg, LEVEL_1, "\n"));
621
622         if (should_have_reached_fixpoint) {
623                 assert(worklists_equal(block_info->end_worklist, worklist));
624         }
625 #endif
626         block_info->end_worklist = duplicate_worklist(worklist);
627
628         do_spilling(block, worklist);
629         deactivate_worklist(worklist);
630
631 #ifdef DEBUG_libfirm
632         if (should_have_reached_fixpoint) {
633                 assert(worklists_equal(block_info->start_worklist, worklist));
634         }
635 #endif
636         block_info->start_worklist = worklist;
637
638         /* we shouldn't have any live values left at the start block */
639         n_preds = get_Block_n_cfgpreds(block);
640         //assert(n_preds != 0 || worklist->n_live_values == 0);
641 }
642
643 typedef struct block_or_loop_t block_or_loop_t;
644 struct block_or_loop_t {
645         union {
646                 ir_node *block;
647                 ir_loop *loop;
648         } v;
649         bool is_loop;
650 };
651
652 static block_or_loop_t *loop_blocks;
653 static ir_loop         *current_loop;
654
655 static void find_blocks(ir_node *block);
656
657 static void find_in_loop(ir_loop *loop, ir_node *entry)
658 {
659         loop_info_t     *loop_info = get_loop_info(loop);
660         block_or_loop_t block_or_loop;
661         loop_edge_t     *edge;
662
663         /* simply mark 1 block in the loop to indicate that the loop was already
664          * processed */
665         ir_node *some_block = loop_info->entry_edges->block;
666         if (Block_block_visited(some_block))
667                 return;
668
669         block_or_loop.v.loop  = loop;
670         block_or_loop.is_loop = true;
671         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
672
673 #ifndef NDEBUG
674         {
675                 /* we should be 1 of the entry blocks */
676                 loop_edge_t *edge  = loop_info->entry_edges;
677                 bool         found = false;
678                 for ( ; edge != NULL; edge = edge->next) {
679                         if (edge->block == entry)
680                                 found = true;
681                 }
682                 assert(found);
683         }
684 #endif
685         /* check all loop successors */
686         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
687                 ir_node *succ      = edge->block;
688                 ir_loop *succ_loop = get_irn_loop(succ);
689
690                 if (succ_loop == current_loop) {
691                         find_blocks(succ);
692                 } else {
693                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
694                 }
695         }
696 }
697
698 static void find_blocks(ir_node *block)
699 {
700         const ir_edge_t *edge;
701         block_or_loop_t block_or_loop;
702
703         if (Block_block_visited(block))
704                 return;
705
706         block_or_loop.v.block = block;
707         block_or_loop.is_loop = false;
708
709         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
710         mark_Block_block_visited(block);
711
712         foreach_block_succ(block, edge) {
713                 ir_node *succ = get_edge_src_irn(edge);
714
715                 /* is it part of the current loop? */
716                 ir_loop *loop = get_irn_loop(succ);
717                 if (loop != current_loop) {
718                         /* a sub-loop? */
719                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
720                                 find_in_loop(loop, succ);
721                         } else {
722                                 /* parent loop: we're not interested in the block */
723                         }
724                 } else {
725                         find_blocks(succ);
726                 }
727         }
728 }
729
730 /**
731  * append an entry to a worklist. WARNING: The entry must not already be in the
732  * worklist.
733  */
734 static void worklist_append(worklist_t *worklist, ir_node *value,
735                             ir_node *reload_point,
736                                                         ir_loop *unused_livethrough_loop)
737 {
738         worklist_entry_t *entry     = obstack_alloc(&obst, sizeof(entry[0]));
739         memset(entry, 0, sizeof(entry[0]));
740
741 #ifdef EXPENSIVE_CHECKS
742         {
743                 struct list_head *entry;
744                 list_for_each(entry, &worklist->live_values) {
745                         worklist_entry_t *wl_entry
746                                 = list_entry(entry, worklist_entry_t, head);
747                         assert(wl_entry->value != value);
748                 }
749         }
750 #endif
751
752         entry->value                   = value;
753         entry->reload_point            = reload_point;
754         entry->unused_livethrough_loop = unused_livethrough_loop;
755         list_add_tail(&entry->head, &worklist->live_values);
756         ++worklist->n_live_values;
757         assert(worklist->n_live_values <= n_regs);
758 }
759
760 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
761 {
762         loop_edge_t *edge;
763         ++worklist_visited;
764
765         /* add the value to all loop exit and entry blocks */
766         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
767                 ir_node            *block
768                         = get_Block_cfgpred_block(edge->block, edge->pos);
769                 const block_info_t *info     = get_block_info(block);
770                 worklist_t         *worklist = info->end_worklist;
771                 ir_node            *reload_point = NULL;
772
773                 if (worklist->visited >= worklist_visited)
774                         continue;
775                 worklist->visited = worklist_visited;
776
777                 /* TODO: we need a smarter mechanism here, that makes the reloader place
778                  * reload nodes on all loop exits... */
779
780                 worklist_append(worklist, value, reload_point, loop_info->loop);
781         }
782         edge = loop_info->entry_edges;
783         for ( ; edge != NULL; edge = edge->next) {
784                 ir_node            *entry_block = edge->block;
785                 const block_info_t *info        = get_block_info(entry_block);
786                 worklist_t         *worklist    = info->start_worklist;
787                 ir_node            *pred_block;
788                 ir_node            *reload_point;
789
790                 if (worklist->visited >= worklist_visited)
791                         continue;
792                 worklist->visited = worklist_visited;
793
794                 pred_block   = get_Block_cfgpred_block(entry_block, edge->pos);
795                 reload_point = be_get_end_of_block_insertion_point(pred_block);
796
797                 worklist_append(worklist, value, reload_point, loop_info->loop);
798         }
799
800         set_irn_link(value, NULL);
801         ++loop_info->max_register_pressure;
802 }
803
804 static void push_unused_livethroughs(loop_info_t *loop_info)
805 {
806         loop_edge_t *edge;
807
808         /* we can only push unused livethroughs if register pressure inside the loop
809          * was low enough */
810         if (loop_info->max_register_pressure >= n_regs)
811                 return;
812
813         /* find unused livethroughs: register pressure in the loop was low enough
814          * which means that we had no spills which implies that at every point in
815          * the loop all*/
816         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
817                 ir_node            *block          = edge->block;
818                 const block_info_t *info           = get_block_info(block);
819                 worklist_t         *start_worklist = info->start_worklist;
820                 ir_node            *exit_block;
821                 const block_info_t *exit_info;
822                 worklist_t         *end_worklist;
823                 struct list_head   *entry;
824
825                 if (start_worklist == NULL)
826                         continue;
827
828                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
829                 exit_info    = get_block_info(exit_block);
830                 end_worklist = exit_info->end_worklist;
831
832                 activate_worklist(end_worklist);
833                 /* all values contained in the start_worklist, which are not available
834                  * in the end_worklist, must be unused livethroughs */
835
836                 list_for_each(entry, &start_worklist->live_values) {
837                         worklist_entry_t *wl_entry
838                                 = list_entry(entry, worklist_entry_t, head);
839                         ir_node *value = wl_entry->value;
840
841                         if (loop_info->max_register_pressure >= n_regs)
842                                 break;
843
844
845                         if (!worklist_contains(value)) {
846                                 /* add it to all loop exits */
847                                 DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
848                                 push_unused_livethrough(loop_info, value);
849                                 /* the value should now be part of end_worklist, so mark it */
850                                 mark_irn_visited(value);
851                         }
852                 }
853
854                 deactivate_worklist(end_worklist);
855         }
856 }
857
858 static void process_block_or_loop(const block_or_loop_t *block_or_loop)
859 {
860         if (block_or_loop->is_loop) {
861                 loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop);
862
863                 if (do_push_unused_livethroughs)
864                         push_unused_livethroughs(loop_info);
865
866                 if (loop_info->max_register_pressure > max_register_pressure)
867                         max_register_pressure = loop_info->max_register_pressure;
868
869                 return;
870         }
871         process_block(block_or_loop->v.block, NULL);
872 }
873
874 static void process_loop(ir_loop *loop)
875 {
876         int         n_elements = get_loop_n_elements(loop);
877         int         i, len;
878         loop_info_t *loop_info;
879         loop_edge_t *edge;
880         ir_node     *some_block;
881
882         /* first handle all sub-loops */
883         for (i = 0; i < n_elements; ++i) {
884                 loop_element element = get_loop_element(loop, i);
885                 if (*element.kind != k_ir_loop)
886                         continue;
887
888                 process_loop(element.son);
889         }
890
891         /* create a postorder of the blocks */
892         loop_info = get_loop_info(loop);
893         edge      = loop_info->entry_edges;
894         if (edge != NULL) {
895                 some_block = edge->block;
896         } else {
897                 assert(loop == get_irg_loop(current_ir_graph));
898                 some_block = get_irg_start_block(current_ir_graph);
899         }
900
901         loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
902         current_loop = loop;
903
904         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
905         inc_irg_block_visited(current_ir_graph);
906         find_blocks(some_block);
907         /* for endless loops the end-block might be unreachable */
908         if (loop == get_irg_loop(current_ir_graph)) {
909                 find_blocks(get_irg_end_block(current_ir_graph));
910         }
911         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
912
913         DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
914         len = ARR_LEN(loop_blocks);
915         for (i = 0; i < len; ++i) {
916                 block_or_loop_t *block_or_loop = &loop_blocks[i];
917                 if (block_or_loop->is_loop) {
918                         DB((dbg, LEVEL_3, " L-%p", block_or_loop->v.loop));
919                 } else {
920                         DB((dbg, LEVEL_3, " B-%+F", block_or_loop->v.block));
921                 }
922         }
923         DB((dbg, LEVEL_3, "\n"));
924
925         max_register_pressure = 0;
926
927         /* run1: tentative phase */
928         tentative_mode = true;
929         for (i = len-1; i >= 0; --i) {
930                 process_block_or_loop(&loop_blocks[i]);
931         }
932
933         /* run2: tentative phase - should reach fixpoint */
934         tentative_mode = true;
935         for (i = len-1; i >= 0; --i) {
936                 process_block_or_loop(&loop_blocks[i]);
937         }
938
939 #ifndef NDEBUG
940         /* run3: tentative phase - check fixpoint */
941         tentative_mode               = true;
942         should_have_reached_fixpoint = true;
943         for (i = len-1; i >= 0; --i) {
944                 process_block_or_loop(&loop_blocks[i]);
945         }
946         should_have_reached_fixpoint = false;
947 #endif
948
949         /* run4: add spills/reloads */
950         tentative_mode              = false;
951         do_push_unused_livethroughs = true;
952         for (i = len-1; i >= 0; --i) {
953                 process_block_or_loop(&loop_blocks[i]);
954         }
955         do_push_unused_livethroughs = false;
956
957         loop_info->max_register_pressure = max_register_pressure;
958         DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
959                                 (unsigned) max_register_pressure));
960
961         DEL_ARR_F(loop_blocks);
962 }
963
964 static void fix_block_borders(ir_node *block, void *data)
965 {
966         block_info_t *block_info     = get_block_info(block);
967         worklist_t   *start_worklist = block_info->start_worklist;
968         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
969         ir_loop      *loop           = get_irn_loop(block);
970         int           i;
971
972         (void) data;
973         assert(start_worklist != NULL);
974
975         for (i = 0; i < n_cfgpreds; ++i) {
976                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
977                 block_info_t *pred_block_info = get_block_info(pred_block);
978                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
979                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
980                 bool          is_loop_entry   = false;
981                 struct list_head *entry;
982
983                 assert(end_worklist != NULL);
984
985                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
986                         is_loop_entry = true;
987                 }
988
989                 /* reload missing values */
990                 activate_worklist(end_worklist);
991
992                 list_for_each(entry, &start_worklist->live_values) {
993                         worklist_entry_t *wl_entry
994                                 = list_entry(entry, worklist_entry_t, head);
995                         ir_node          *value = wl_entry->value;
996
997                         if (is_Phi(value) && get_nodes_block(value) == block) {
998                                 value = get_irn_n(value, i);
999
1000                                 /* we might have unknowns as argument for the phi */
1001                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
1002                                         continue;
1003                         }
1004
1005                         if (worklist_contains(value))
1006                                 continue;
1007                         if (wl_entry->unused_livethrough_loop != NULL && !is_loop_entry)
1008                                 continue;
1009
1010                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
1011                 }
1012
1013                 deactivate_worklist(end_worklist);
1014         }
1015 }
1016
1017 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
1018 {
1019         ir_graph *irg = be_get_birg_irg(birg);
1020
1021         cls    = ncls;
1022         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1023
1024         /* shortcut for register classes with ignore regs only */
1025         if (n_regs == 0)
1026                 return;
1027
1028         worklist_visited = 0;
1029         arch_env         = be_get_birg_arch_env(birg);
1030         exec_freq        = be_get_birg_exec_freq(birg);
1031
1032         be_clear_links(irg);
1033         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1034                         | IR_RESOURCE_LOOP_LINK);
1035         inc_irg_visited(irg);
1036
1037         obstack_init(&obst);
1038         senv = be_new_spill_env(birg);
1039
1040         assure_cf_loop(irg);
1041         clear_loop_info(get_irg_loop(irg));
1042         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
1043
1044         process_loop(get_irg_loop(current_ir_graph));
1045
1046         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1047
1048         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1049                         | IR_RESOURCE_LOOP_LINK);
1050
1051         be_insert_spills_reloads(senv);
1052
1053         obstack_free(&obst, NULL);
1054
1055         /* clean up */
1056         be_delete_spill_env(senv);
1057 }
1058
1059 void be_init_spillbelady3(void)
1060 {
1061         static be_spiller_t belady3_spiller = {
1062                 be_spill_belady3
1063         };
1064
1065         be_register_spiller("belady3", &belady3_spiller);
1066         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1067 }
1068
1069 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);