Remove the unused parameter const arch_env_t *env from arch_irn_get_flags(), arch_irn...
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include <stdbool.h>
37
38 #include "debug.h"
39 #include "list.h"
40 #include "pdeq.h"
41
42 #include "irnode_t.h"
43 #include "irprintf.h"
44 #include "iredges_t.h"
45 #include "irloop_t.h"
46 #include "irgwalk.h"
47 #include "execfreq.h"
48
49 #include "bemodule.h"
50 #include "bespill.h"
51 #include "beutil.h"
52 #include "bespilloptions.h"
53 #include "besched_t.h"
54 #include "be_t.h"
55
56 #ifndef NDEBUG
57 #define EXPENSIVE_CHECKS
58 #endif
59
60 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
61
62 typedef struct loop_edge_t loop_edge_t;
63 struct loop_edge_t {
64         ir_node     *block;
65         int          pos;
66         loop_edge_t *next;
67 };
68
69 typedef struct loop_info_t loop_info_t;
70 struct loop_info_t {
71         ir_loop     *loop;
72         loop_edge_t *entry_edges;
73         loop_edge_t *exit_edges;
74         size_t       max_register_pressure;
75 };
76
77 typedef struct worklist_entry_t worklist_entry_t;
78 struct worklist_entry_t {
79         struct list_head  head;
80         ir_node          *value;
81         ir_node          *reload_point;
82         ir_loop          *unused_livethrough_loop;
83 };
84
85 typedef struct worklist_t worklist_t;
86 struct worklist_t {
87         struct list_head  live_values;
88         size_t            n_live_values;
89         ir_visited_t      visited;
90 };
91
92 typedef struct block_info_t block_info_t;
93 struct block_info_t {
94         worklist_t *start_worklist;
95         worklist_t *end_worklist;
96 };
97
98 static const arch_register_class_t *cls;
99 static struct obstack               obst;
100 static spill_env_t                 *senv;
101 static size_t                       n_regs;
102 static size_t                       max_register_pressure;
103 static bool                         tentative_mode;
104 static bool                         should_have_reached_fixpoint;
105 static bool                         do_push_unused_livethroughs;
106 static ir_exec_freq                *exec_freq;
107 static ir_visited_t                 worklist_visited;
108
109 static worklist_t *new_worklist(void)
110 {
111         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
112         memset(worklist, 0, sizeof(worklist[0]));
113
114         INIT_LIST_HEAD(&worklist->live_values);
115         worklist->n_live_values    = 0;
116
117         return worklist;
118 }
119
120 static void mark_irn_not_visited(ir_node *node)
121 {
122         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
123 }
124
125 static bool worklist_contains(const ir_node *node)
126 {
127         return irn_visited(node);
128 }
129
130 static block_info_t *get_block_info(ir_node *block)
131 {
132         block_info_t *info = get_irn_link(block);
133         if (info != NULL)
134                 return info;
135
136         info = obstack_alloc(&obst, sizeof(info[0]));
137         memset(info, 0, sizeof(info[0]));
138         set_irn_link(block, info);
139         return info;
140 }
141
142 static void deactivate_worklist(const worklist_t *worklist)
143 {
144         struct list_head *entry;
145
146         list_for_each(entry, &worklist->live_values) {
147                 worklist_entry_t *wl_entry
148                         = list_entry(entry, worklist_entry_t, head);
149                 assert(worklist_contains(wl_entry->value));
150                 mark_irn_not_visited(wl_entry->value);
151                 set_irn_link(wl_entry->value, NULL);
152         }
153 }
154
155 static void activate_worklist(const worklist_t *worklist)
156 {
157         struct list_head *entry;
158
159         list_for_each(entry, &worklist->live_values) {
160                 worklist_entry_t *wl_entry
161                         = list_entry(entry, worklist_entry_t, head);
162                 assert(!worklist_contains(wl_entry->value));
163                 mark_irn_visited(wl_entry->value);
164                 set_irn_link(wl_entry->value, wl_entry);
165         }
166 }
167
168 /**
169  * duplicate a worklist and directly activates it
170  */
171 static void fill_and_activate_worklist(worklist_t *new_worklist,
172                 const worklist_t *worklist, ir_node *block, ir_node *succ_block,
173                 int succ_pos)
174 {
175         ir_node          *reload_point  = NULL;
176         size_t            n_live_values = 0;
177         struct list_head *entry;
178
179         if (succ_block != NULL &&
180                         (get_Block_n_cfgpreds(succ_block) > 1
181                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
182                 reload_point = be_get_end_of_block_insertion_point(block);
183         }
184
185         list_for_each(entry, &worklist->live_values) {
186                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
187                 ir_node          *value     = wl_entry->value;
188                 worklist_entry_t *new_entry;
189
190                 if (new_worklist->n_live_values >= n_regs)
191                         break;
192
193                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
194                         value = get_Phi_pred(value, succ_pos);
195
196                         /* can happen for unknown phi preds */
197                         if (!arch_irn_consider_in_reg_alloc(cls, value))
198                                 continue;
199                 }
200
201                 if (irn_visited_else_mark(value))
202                         continue;
203
204                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
205                 memset(new_entry, 0, sizeof(new_entry[0]));
206
207                 new_entry->value = value;
208                 if (reload_point != NULL) {
209                         new_entry->reload_point = reload_point;
210                 } else {
211                         new_entry->reload_point = wl_entry->reload_point;
212                 }
213
214                 list_add_tail(&new_entry->head, &new_worklist->live_values);
215                 ++n_live_values;
216
217                 set_irn_link(value, new_entry);
218                 new_worklist->n_live_values++;
219         }
220 }
221
222 static worklist_t *duplicate_worklist(const worklist_t *worklist)
223 {
224         worklist_t       *new_worklist;
225         struct list_head *entry;
226
227         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
228         memset(new_worklist, 0, sizeof(new_worklist[0]));
229         INIT_LIST_HEAD(&new_worklist->live_values);
230         new_worklist->n_live_values = worklist->n_live_values;
231
232         list_for_each(entry, &worklist->live_values) {
233                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
234                 worklist_entry_t *new_entry
235                         = obstack_alloc(&obst, sizeof(new_entry[0]));
236
237                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
238                 list_add_tail(&new_entry->head, &new_worklist->live_values);
239         }
240
241         return new_worklist;
242 }
243
244 #ifdef DEBUG_libfirm
245 static void print_worklist(const worklist_t *worklist, int level)
246 {
247         struct list_head *entry;
248
249         DB((dbg, level, "%d values: ", worklist->n_live_values));
250         list_for_each(entry, &worklist->live_values) {
251                 worklist_entry_t *wl_entry
252                         = list_entry(entry, worklist_entry_t, head);
253
254                 DB((dbg, level, "%+F ", wl_entry->value));
255         }
256 }
257 #endif
258
259
260
261 static void clear_loop_info(ir_loop *loop)
262 {
263         int n_elements = get_loop_n_elements(loop);
264         int i;
265
266         loop->link = NULL;
267         for (i = 0; i < n_elements; ++i) {
268                 loop_element element = get_loop_element(loop, i);
269                 if (*element.kind != k_ir_loop)
270                         continue;
271
272                 clear_loop_info(element.son);
273         }
274 }
275
276 static loop_info_t *get_loop_info(ir_loop *loop)
277 {
278         loop_info_t *info = loop->link;
279         if (info != NULL)
280                 return info;
281
282         info = obstack_alloc(&obst, sizeof(info[0]));
283         memset(info, 0, sizeof(info[0]));
284         info->loop = loop;
285         loop->link = info;
286         return info;
287 }
288
289 /**
290  * constructs loop in+out edges when called in a block walker
291  */
292 static void construct_loop_edges(ir_node *block, void *data)
293 {
294         int      n_cfgpreds = get_Block_n_cfgpreds(block);
295         ir_loop *loop       = get_irn_loop(block);
296         int      i;
297
298         (void) data;
299
300         for (i = 0; i < n_cfgpreds; ++i) {
301                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
302                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
303                 loop_edge_t *edge;
304                 bool        is_exit_edge;
305                 ir_loop     *l, *goal;
306
307                 if (cfgpred_loop == loop)
308                         continue;
309
310                 /* critical edges are splitted, so we can't jump from 1 loop
311                  * directly into another without going out of it */
312                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
313
314                 /* edge out of loop */
315                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
316                         is_exit_edge = true;
317                         l            = cfgpred_loop;
318                         goal         = loop;
319                 } else {
320                         is_exit_edge = false;
321                         l            = loop;
322                         goal         = cfgpred_loop;
323                 }
324
325                 /* this might be a jump out/in multiple loops */
326                 do {
327                         loop_info_t *l_info = get_loop_info(l);
328
329                         edge = obstack_alloc(&obst, sizeof(edge[0]));
330                         memset(edge, 0, sizeof(edge[0]));
331                         edge->block = block;
332                         edge->pos   = i;
333
334                         if (is_exit_edge) {
335                                 edge->next         = l_info->exit_edges;
336                                 l_info->exit_edges = edge;
337                                 assert(get_loop_depth(loop) < get_loop_depth(l));
338                         } else {
339                                 edge->next          = l_info->entry_edges;
340                                 l_info->entry_edges = edge;
341                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
342                         }
343                         l = get_loop_outer_loop(l);
344                 } while(l != goal);
345         }
346 }
347
348
349
350
351 static void place_reload(worklist_entry_t *entry)
352 {
353         if (tentative_mode)
354                 return;
355
356         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
357                 entry->reload_point));
358         assert(entry->reload_point != NULL);
359         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
360         entry->reload_point = NULL;
361 }
362
363 /**
364  * makes sure the worklist contains not more than n_regs - room_needed entries
365  */
366 static void make_room(worklist_t *worklist, size_t room_needed)
367 {
368         int               i;
369         int               spills_needed;
370         struct list_head *entry;
371
372         spills_needed = worklist->n_live_values + room_needed - n_regs;
373         if (spills_needed <= 0)
374                 return;
375
376         entry = worklist->live_values.next;
377         for(i = spills_needed; i > 0; --i) {
378                 struct list_head *next = entry->next;
379                 worklist_entry_t *wl_entry
380                         = list_entry(entry, worklist_entry_t, head);
381
382                 assert(worklist_contains(wl_entry->value));
383                 mark_irn_not_visited(wl_entry->value);
384                 place_reload(wl_entry);
385                 list_del(entry);
386
387                 entry = next;
388         }
389         worklist->n_live_values -= spills_needed;
390 }
391
392 /**
393  * a value was used, so bring it to the back of the worklist (which might
394  * result in a spill of another value).
395  */
396 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
397 {
398         /* already in the worklist? move around, otherwise add at back */
399         worklist_entry_t *entry = get_irn_link(value);
400
401         assert(arch_irn_consider_in_reg_alloc(cls, value));
402
403         if (worklist_contains(value)) {
404                 assert(entry != NULL);
405
406                 list_del(&entry->head);
407         } else {
408                 if (entry == NULL) {
409                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
410                         memset(entry, 0, sizeof(entry[0]));
411
412                         entry->value = value;
413                         set_irn_link(value, entry);
414                 }
415
416                 ++worklist->n_live_values;
417                 mark_irn_visited(value);
418         }
419
420         entry->reload_point = sched_point;
421         list_add_tail(&entry->head, &worklist->live_values);
422 }
423
424 static void worklist_remove(worklist_t *worklist, ir_node *value)
425 {
426         worklist_entry_t *entry = get_irn_link(value);
427         assert(entry != NULL);
428         list_del(&entry->head);
429         --worklist->n_live_values;
430
431         assert(worklist_contains(value));
432         mark_irn_not_visited(value);
433 }
434
435 static void update_max_pressure(worklist_t *worklist)
436 {
437         if (worklist->n_live_values > max_register_pressure)
438                 max_register_pressure = worklist->n_live_values;
439 }
440
441 static void do_spilling(ir_node *block, worklist_t *worklist)
442 {
443         ir_node *node;
444
445         assert(worklist != NULL);
446
447         sched_foreach_reverse(block, node) {
448                 int    i, arity;
449                 size_t n_defs = 0;
450
451                 DB((dbg, LEVEL_2, "\t%+F... ", node));
452                 update_max_pressure(worklist);
453
454                 if (is_Phi(node)) {
455                         ir_node *node2;
456                         /* TODO: if we have some free registers, then we could decide to
457                          * not spill some phis (but not for phis where at least 1 input is
458                          * themselfes) */
459
460                         /* we have to spill all phis that are not live */
461                         sched_foreach_reverse_from(node, node2) {
462                                 assert(is_Phi(node2));
463
464                                 if (worklist_contains(node2))
465                                         continue;
466                                 if (!arch_irn_consider_in_reg_alloc(cls, node2))
467                                         continue;
468
469                                 if (!tentative_mode)
470                                         be_spill_phi(senv, node2);
471                         }
472                         DB((dbg, LEVEL_2, "\n"));
473                         break;
474                 }
475
476                 /* remove values defined by this instruction from the workset. Values
477                  * defined but not in the workset need free registers */
478                 if (get_irn_mode(node) == mode_T) {
479                         const ir_edge_t *edge;
480
481                         foreach_out_edge(node, edge) {
482                                 ir_node *proj = get_edge_src_irn(edge);
483                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
484                                         continue;
485                                 if (worklist_contains(proj)) {
486                                         worklist_remove(worklist, proj);
487                                 } else {
488                                         ++n_defs;
489                                 }
490                         }
491                 } else if (arch_irn_consider_in_reg_alloc(cls, node)) {
492                         if (worklist_contains(node)) {
493                                 worklist_remove(worklist, node);
494                         } else {
495                                 n_defs = 1;
496                         }
497                 }
498
499                 /* make sure we have enough free registers for the spills */
500                 make_room(worklist, n_defs);
501
502                 /* put all values used by the instruction into the workset */
503                 arity = get_irn_arity(node);
504                 for(i = 0; i < arity; ++i) {
505                         ir_node *use = get_irn_n(node, i);
506
507                         if (!arch_irn_consider_in_reg_alloc(cls, use))
508                                 continue;
509
510                         val_used(worklist, use, node);
511                 }
512
513                 /* we might have too many values in the worklist now and need to spill
514                  * some */
515                 make_room(worklist, 0);
516
517 #ifdef DEBUG_libfirm
518                 print_worklist(worklist, LEVEL_2);
519                 DB((dbg, LEVEL_2, "\n"));
520 #endif
521         }
522
523         update_max_pressure(worklist);
524
525 #ifdef DEBUG_libfirm
526         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
527         print_worklist(worklist, LEVEL_1);
528         DB((dbg, LEVEL_1, "\n"));
529 #endif
530 }
531
532 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
533 {
534         const struct list_head *i1 = &wl1->live_values;
535         const struct list_head *i2 = &wl2->live_values;
536
537         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
538                         i1 = i1->next, i2 = i2->next) {
539                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
540                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
541
542                 if (entry1->value != entry2->value)
543                         return false;
544         }
545         /* both lists at the end */
546         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
547                 return false;
548
549         return true;
550 }
551
552 static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
553 {
554         double           best_execfreq   = -1;
555         worklist_t      *best_worklist   = NULL;
556         ir_node         *best_succ_block = NULL;
557         int              best_pos;
558         const ir_edge_t *edge;
559
560         /* construct worklist */
561         foreach_block_succ(block, edge) {
562                 ir_node      *succ_block = get_edge_src_irn(edge);
563                 double       execfreq    = get_block_execfreq(exec_freq, succ_block);
564                 block_info_t *block_info;
565                 worklist_t   *succ_worklist;
566
567                 if (execfreq < best_execfreq)
568                         continue;
569
570                 block_info    = get_block_info(succ_block);
571                 succ_worklist = block_info->start_worklist;
572
573                 if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
574                         continue;
575
576                 best_execfreq   = execfreq;
577                 best_worklist   = succ_worklist;
578                 best_succ_block = succ_block;
579                 best_pos        = get_edge_src_pos(edge);
580         }
581
582         if (best_worklist == NULL)
583                 return false;
584         best_worklist->visited = worklist_visited;
585
586         fill_and_activate_worklist(new_worklist, best_worklist, block,
587                         best_succ_block, best_pos);
588         return true;
589 }
590
591 static worklist_t *construct_start_worklist(ir_node *block)
592 {
593         worklist_t *worklist = new_worklist();
594
595         ++worklist_visited;
596
597         while(fill_start_worklist(worklist, block)) {
598                 if (worklist->n_live_values >= n_regs)
599                         break;
600         }
601
602         return worklist;
603 }
604
605 static void process_block(ir_node *block, void *env)
606 {
607         block_info_t    *block_info;
608         worklist_t      *worklist;
609         int              n_preds;
610
611         (void) env;
612         DB((dbg, LEVEL_1, "Processing %+F\n", block));
613
614         worklist   = construct_start_worklist(block);
615         block_info = get_block_info(block);
616
617 #ifdef DEBUG_libfirm
618         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
619         print_worklist(worklist, LEVEL_1);
620         DB((dbg, LEVEL_1, "\n"));
621
622         if (should_have_reached_fixpoint) {
623                 assert(worklists_equal(block_info->end_worklist, worklist));
624         }
625 #endif
626         block_info->end_worklist = duplicate_worklist(worklist);
627
628         do_spilling(block, worklist);
629         deactivate_worklist(worklist);
630
631 #ifdef DEBUG_libfirm
632         if (should_have_reached_fixpoint) {
633                 assert(worklists_equal(block_info->start_worklist, worklist));
634         }
635 #endif
636         block_info->start_worklist = worklist;
637
638         /* we shouldn't have any live values left at the start block */
639         n_preds = get_Block_n_cfgpreds(block);
640         //assert(n_preds != 0 || worklist->n_live_values == 0);
641 }
642
643 typedef struct block_or_loop_t block_or_loop_t;
644 struct block_or_loop_t {
645         union {
646                 ir_node *block;
647                 ir_loop *loop;
648         } v;
649         bool is_loop;
650 };
651
652 static block_or_loop_t *loop_blocks;
653 static ir_loop         *current_loop;
654
655 static void find_blocks(ir_node *block);
656
657 static void find_in_loop(ir_loop *loop, ir_node *entry)
658 {
659         loop_info_t     *loop_info = get_loop_info(loop);
660         block_or_loop_t block_or_loop;
661         loop_edge_t     *edge;
662
663         /* simply mark 1 block in the loop to indicate that the loop was already
664          * processed */
665         ir_node *some_block = loop_info->entry_edges->block;
666         if (Block_block_visited(some_block))
667                 return;
668
669         block_or_loop.v.loop  = loop;
670         block_or_loop.is_loop = true;
671         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
672
673 #ifndef NDEBUG
674         {
675                 /* we should be 1 of the entry blocks */
676                 loop_edge_t *edge  = loop_info->entry_edges;
677                 bool         found = false;
678                 for ( ; edge != NULL; edge = edge->next) {
679                         if (edge->block == entry)
680                                 found = true;
681                 }
682                 assert(found);
683         }
684 #else
685         (void) entry;
686 #endif
687
688         /* check all loop successors */
689         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
690                 ir_node *succ      = edge->block;
691                 ir_loop *succ_loop = get_irn_loop(succ);
692
693                 if (succ_loop == current_loop) {
694                         find_blocks(succ);
695                 } else {
696                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
697                 }
698         }
699 }
700
701 static void find_blocks(ir_node *block)
702 {
703         const ir_edge_t *edge;
704         block_or_loop_t block_or_loop;
705
706         if (Block_block_visited(block))
707                 return;
708
709         block_or_loop.v.block = block;
710         block_or_loop.is_loop = false;
711
712         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
713         mark_Block_block_visited(block);
714
715         foreach_block_succ(block, edge) {
716                 ir_node *succ = get_edge_src_irn(edge);
717
718                 /* is it part of the current loop? */
719                 ir_loop *loop = get_irn_loop(succ);
720                 if (loop != current_loop) {
721                         /* a sub-loop? */
722                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
723                                 find_in_loop(loop, succ);
724                         } else {
725                                 /* parent loop: we're not interested in the block */
726                         }
727                 } else {
728                         find_blocks(succ);
729                 }
730         }
731 }
732
733 /**
734  * append an entry to a worklist. WARNING: The entry must not already be in the
735  * worklist.
736  */
737 static void worklist_append(worklist_t *worklist, ir_node *value,
738                             ir_node *reload_point,
739                                                         ir_loop *unused_livethrough_loop)
740 {
741         worklist_entry_t *entry     = obstack_alloc(&obst, sizeof(entry[0]));
742         memset(entry, 0, sizeof(entry[0]));
743
744 #ifdef EXPENSIVE_CHECKS
745         {
746                 struct list_head *entry;
747                 list_for_each(entry, &worklist->live_values) {
748                         worklist_entry_t *wl_entry
749                                 = list_entry(entry, worklist_entry_t, head);
750                         assert(wl_entry->value != value);
751                 }
752         }
753 #endif
754
755         entry->value                   = value;
756         entry->reload_point            = reload_point;
757         entry->unused_livethrough_loop = unused_livethrough_loop;
758         list_add_tail(&entry->head, &worklist->live_values);
759         ++worklist->n_live_values;
760         assert(worklist->n_live_values <= n_regs);
761 }
762
763 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
764 {
765         loop_edge_t *edge;
766         ++worklist_visited;
767
768         /* add the value to all loop exit and entry blocks */
769         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
770                 ir_node            *block
771                         = get_Block_cfgpred_block(edge->block, edge->pos);
772                 const block_info_t *info     = get_block_info(block);
773                 worklist_t         *worklist = info->end_worklist;
774                 ir_node            *reload_point = NULL;
775
776                 if (worklist->visited >= worklist_visited)
777                         continue;
778                 worklist->visited = worklist_visited;
779
780                 /* TODO: we need a smarter mechanism here, that makes the reloader place
781                  * reload nodes on all loop exits... */
782
783                 worklist_append(worklist, value, reload_point, loop_info->loop);
784         }
785         edge = loop_info->entry_edges;
786         for ( ; edge != NULL; edge = edge->next) {
787                 ir_node            *entry_block = edge->block;
788                 const block_info_t *info        = get_block_info(entry_block);
789                 worklist_t         *worklist    = info->start_worklist;
790                 ir_node            *pred_block;
791                 ir_node            *reload_point;
792
793                 if (worklist->visited >= worklist_visited)
794                         continue;
795                 worklist->visited = worklist_visited;
796
797                 pred_block   = get_Block_cfgpred_block(entry_block, edge->pos);
798                 reload_point = be_get_end_of_block_insertion_point(pred_block);
799
800                 worklist_append(worklist, value, reload_point, loop_info->loop);
801         }
802
803         set_irn_link(value, NULL);
804         ++loop_info->max_register_pressure;
805 }
806
807 static void push_unused_livethroughs(loop_info_t *loop_info)
808 {
809         loop_edge_t *edge;
810
811         /* we can only push unused livethroughs if register pressure inside the loop
812          * was low enough */
813         if (loop_info->max_register_pressure >= n_regs)
814                 return;
815
816         /* find unused livethroughs: register pressure in the loop was low enough
817          * which means that we had no spills which implies that at every point in
818          * the loop all*/
819         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
820                 ir_node            *block          = edge->block;
821                 const block_info_t *info           = get_block_info(block);
822                 worklist_t         *start_worklist = info->start_worklist;
823                 ir_node            *exit_block;
824                 const block_info_t *exit_info;
825                 worklist_t         *end_worklist;
826                 struct list_head   *entry;
827
828                 if (start_worklist == NULL)
829                         continue;
830
831                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
832                 exit_info    = get_block_info(exit_block);
833                 end_worklist = exit_info->end_worklist;
834
835                 activate_worklist(end_worklist);
836                 /* all values contained in the start_worklist, which are not available
837                  * in the end_worklist, must be unused livethroughs */
838
839                 list_for_each(entry, &start_worklist->live_values) {
840                         worklist_entry_t *wl_entry
841                                 = list_entry(entry, worklist_entry_t, head);
842                         ir_node *value = wl_entry->value;
843
844                         if (loop_info->max_register_pressure >= n_regs)
845                                 break;
846
847
848                         if (!worklist_contains(value)) {
849                                 /* add it to all loop exits */
850                                 DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
851                                 push_unused_livethrough(loop_info, value);
852                                 /* the value should now be part of end_worklist, so mark it */
853                                 mark_irn_visited(value);
854                         }
855                 }
856
857                 deactivate_worklist(end_worklist);
858         }
859 }
860
861 static void process_block_or_loop(const block_or_loop_t *block_or_loop)
862 {
863         if (block_or_loop->is_loop) {
864                 loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop);
865
866                 if (do_push_unused_livethroughs)
867                         push_unused_livethroughs(loop_info);
868
869                 if (loop_info->max_register_pressure > max_register_pressure)
870                         max_register_pressure = loop_info->max_register_pressure;
871
872                 return;
873         }
874         process_block(block_or_loop->v.block, NULL);
875 }
876
877 static void process_loop(ir_loop *loop)
878 {
879         int         n_elements = get_loop_n_elements(loop);
880         int         i, len;
881         loop_info_t *loop_info;
882         loop_edge_t *edge;
883         ir_node     *some_block;
884
885         /* first handle all sub-loops */
886         for (i = 0; i < n_elements; ++i) {
887                 loop_element element = get_loop_element(loop, i);
888                 if (*element.kind != k_ir_loop)
889                         continue;
890
891                 process_loop(element.son);
892         }
893
894         /* create a postorder of the blocks */
895         loop_info = get_loop_info(loop);
896         edge      = loop_info->entry_edges;
897         if (edge != NULL) {
898                 some_block = edge->block;
899         } else {
900                 assert(loop == get_irg_loop(current_ir_graph));
901                 some_block = get_irg_start_block(current_ir_graph);
902         }
903
904         loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
905         current_loop = loop;
906
907         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
908         inc_irg_block_visited(current_ir_graph);
909         find_blocks(some_block);
910         /* for endless loops the end-block might be unreachable */
911         if (loop == get_irg_loop(current_ir_graph)) {
912                 find_blocks(get_irg_end_block(current_ir_graph));
913         }
914         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
915
916         DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
917         len = ARR_LEN(loop_blocks);
918         for (i = 0; i < len; ++i) {
919                 block_or_loop_t *block_or_loop = &loop_blocks[i];
920                 if (block_or_loop->is_loop) {
921                         DB((dbg, LEVEL_3, " L-%p", block_or_loop->v.loop));
922                 } else {
923                         DB((dbg, LEVEL_3, " B-%+F", block_or_loop->v.block));
924                 }
925         }
926         DB((dbg, LEVEL_3, "\n"));
927
928         max_register_pressure = 0;
929
930         /* run1: tentative phase */
931         tentative_mode = true;
932         for (i = len-1; i >= 0; --i) {
933                 process_block_or_loop(&loop_blocks[i]);
934         }
935
936         /* run2: tentative phase - should reach fixpoint */
937         tentative_mode = true;
938         for (i = len-1; i >= 0; --i) {
939                 process_block_or_loop(&loop_blocks[i]);
940         }
941
942 #ifndef NDEBUG
943         /* run3: tentative phase - check fixpoint */
944         tentative_mode               = true;
945         should_have_reached_fixpoint = true;
946         for (i = len-1; i >= 0; --i) {
947                 process_block_or_loop(&loop_blocks[i]);
948         }
949         should_have_reached_fixpoint = false;
950 #endif
951
952         /* run4: add spills/reloads */
953         tentative_mode              = false;
954         do_push_unused_livethroughs = true;
955         for (i = len-1; i >= 0; --i) {
956                 process_block_or_loop(&loop_blocks[i]);
957         }
958         do_push_unused_livethroughs = false;
959
960         loop_info->max_register_pressure = max_register_pressure;
961         DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
962                                 (unsigned) max_register_pressure));
963
964         DEL_ARR_F(loop_blocks);
965 }
966
967 static void fix_block_borders(ir_node *block, void *data)
968 {
969         block_info_t *block_info     = get_block_info(block);
970         worklist_t   *start_worklist = block_info->start_worklist;
971         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
972         ir_loop      *loop           = get_irn_loop(block);
973         int           i;
974
975         (void) data;
976         assert(start_worklist != NULL);
977
978         for (i = 0; i < n_cfgpreds; ++i) {
979                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
980                 block_info_t *pred_block_info = get_block_info(pred_block);
981                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
982                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
983                 bool          is_loop_entry   = false;
984                 struct list_head *entry;
985
986                 assert(end_worklist != NULL);
987
988                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
989                         is_loop_entry = true;
990                 }
991
992                 /* reload missing values */
993                 activate_worklist(end_worklist);
994
995                 list_for_each(entry, &start_worklist->live_values) {
996                         worklist_entry_t *wl_entry
997                                 = list_entry(entry, worklist_entry_t, head);
998                         ir_node          *value = wl_entry->value;
999
1000                         if (is_Phi(value) && get_nodes_block(value) == block) {
1001                                 value = get_irn_n(value, i);
1002
1003                                 /* we might have unknowns as argument for the phi */
1004                                 if (!arch_irn_consider_in_reg_alloc(cls, value))
1005                                         continue;
1006                         }
1007
1008                         if (worklist_contains(value))
1009                                 continue;
1010                         if (wl_entry->unused_livethrough_loop != NULL && !is_loop_entry)
1011                                 continue;
1012
1013                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
1014                 }
1015
1016                 deactivate_worklist(end_worklist);
1017         }
1018 }
1019
1020 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
1021 {
1022         ir_graph *irg = be_get_birg_irg(birg);
1023
1024         cls    = ncls;
1025         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1026
1027         /* shortcut for register classes with ignore regs only */
1028         if (n_regs == 0)
1029                 return;
1030
1031         worklist_visited = 0;
1032         exec_freq        = be_get_birg_exec_freq(birg);
1033
1034         be_clear_links(irg);
1035         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1036                         | IR_RESOURCE_LOOP_LINK);
1037         inc_irg_visited(irg);
1038
1039         obstack_init(&obst);
1040         senv = be_new_spill_env(birg);
1041
1042         assure_cf_loop(irg);
1043         clear_loop_info(get_irg_loop(irg));
1044         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
1045
1046         process_loop(get_irg_loop(current_ir_graph));
1047
1048         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1049
1050         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1051                         | IR_RESOURCE_LOOP_LINK);
1052
1053         be_insert_spills_reloads(senv);
1054
1055         obstack_free(&obst, NULL);
1056
1057         /* clean up */
1058         be_delete_spill_env(senv);
1059 }
1060
1061 void be_init_spillbelady3(void)
1062 {
1063         static be_spiller_t belady3_spiller = {
1064                 be_spill_belady3
1065         };
1066
1067         be_register_spiller("belady3", &belady3_spiller);
1068         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1069 }
1070
1071 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);