adjustements
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include <stdbool.h>
37
38 #include "debug.h"
39 #include "list.h"
40 #include "pdeq.h"
41
42 #include "irnode_t.h"
43 #include "irprintf.h"
44 #include "iredges_t.h"
45 #include "irloop_t.h"
46 #include "irgwalk.h"
47 #include "execfreq.h"
48
49 #include "bemodule.h"
50 #include "bespill.h"
51 #include "beutil.h"
52 #include "bespilloptions.h"
53 #include "besched_t.h"
54 #include "be_t.h"
55
56 #define EXPENSIVE_CHECKS
57
58 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
59
60 typedef struct loop_edge_t loop_edge_t;
61 struct loop_edge_t {
62         ir_node     *block;
63         int          pos;
64         loop_edge_t *next;
65 };
66
67 typedef struct loop_info_t loop_info_t;
68 struct loop_info_t {
69         ir_loop     *loop;
70         loop_edge_t *entry_edges;
71         loop_edge_t *exit_edges;
72         size_t       max_register_pressure;
73 };
74
75 typedef struct worklist_entry_t worklist_entry_t;
76 struct worklist_entry_t {
77         struct list_head  head;
78         ir_node          *value;
79         ir_node          *reload_point;
80         ir_loop          *unused_livethrough_loop;
81 };
82
83 typedef struct worklist_t worklist_t;
84 struct worklist_t {
85         struct list_head  live_values;
86         size_t            n_live_values;
87         unsigned long     visited;
88 };
89
90 typedef struct block_info_t block_info_t;
91 struct block_info_t {
92         worklist_t *start_worklist;
93         worklist_t *end_worklist;
94 };
95
96 static const arch_env_t            *arch_env;
97 static const arch_register_class_t *cls;
98 static struct obstack               obst;
99 static spill_env_t                 *senv;
100 static size_t                       n_regs;
101 static size_t                       max_register_pressure;
102 static bool                         tentative_mode;
103 static bool                         should_have_reached_fixpoint;
104 static bool                         do_push_unused_livethroughs;
105 static ir_exec_freq                *exec_freq;
106 static unsigned long                worklist_visited;
107
108 static worklist_t *new_worklist(void)
109 {
110         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
111         memset(worklist, 0, sizeof(worklist[0]));
112
113         INIT_LIST_HEAD(&worklist->live_values);
114         worklist->n_live_values    = 0;
115
116         return worklist;
117 }
118
119 static void mark_irn_not_visited(ir_node *node)
120 {
121         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
122 }
123
124 static bool worklist_contains(const ir_node *node)
125 {
126         return irn_visited(node);
127 }
128
129 static block_info_t *get_block_info(ir_node *block)
130 {
131         block_info_t *info = get_irn_link(block);
132         if (info != NULL)
133                 return info;
134
135         info = obstack_alloc(&obst, sizeof(info[0]));
136         memset(info, 0, sizeof(info[0]));
137         set_irn_link(block, info);
138         return info;
139 }
140
141 static void deactivate_worklist(const worklist_t *worklist)
142 {
143         struct list_head *entry;
144
145         list_for_each(entry, &worklist->live_values) {
146                 worklist_entry_t *wl_entry
147                         = list_entry(entry, worklist_entry_t, head);
148                 assert(worklist_contains(wl_entry->value));
149                 mark_irn_not_visited(wl_entry->value);
150                 set_irn_link(wl_entry->value, NULL);
151         }
152 }
153
154 static void activate_worklist(const worklist_t *worklist)
155 {
156         struct list_head *entry;
157
158         list_for_each(entry, &worklist->live_values) {
159                 worklist_entry_t *wl_entry
160                         = list_entry(entry, worklist_entry_t, head);
161                 assert(!worklist_contains(wl_entry->value));
162                 mark_irn_visited(wl_entry->value);
163                 set_irn_link(wl_entry->value, wl_entry);
164         }
165 }
166
167 /**
168  * duplicate a worklist and directly activates it
169  */
170 static void fill_and_activate_worklist(worklist_t *new_worklist,
171                 const worklist_t *worklist, ir_node *block, ir_node *succ_block,
172                 int succ_pos)
173 {
174         ir_node          *reload_point  = NULL;
175         size_t            n_live_values = 0;
176         struct list_head *entry;
177
178         if (succ_block != NULL &&
179                         (get_Block_n_cfgpreds(succ_block) > 1
180                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
181                 reload_point = be_get_end_of_block_insertion_point(block);
182         }
183
184         list_for_each(entry, &worklist->live_values) {
185                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
186                 ir_node          *value     = wl_entry->value;
187                 worklist_entry_t *new_entry;
188
189                 if (new_worklist->n_live_values >= n_regs)
190                         break;
191
192                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
193                         value = get_Phi_pred(value, succ_pos);
194
195                         /* can happen for unknown phi preds */
196                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
197                                 continue;
198                 }
199
200                 if (irn_visited(value))
201                         continue;
202
203                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
204                 memset(new_entry, 0, sizeof(new_entry[0]));
205
206                 new_entry->value = value;
207                 if (reload_point != NULL) {
208                         new_entry->reload_point = reload_point;
209                 } else {
210                         new_entry->reload_point = wl_entry->reload_point;
211                 }
212
213                 list_add_tail(&new_entry->head, &new_worklist->live_values);
214                 ++n_live_values;
215
216                 mark_irn_visited(value);
217                 set_irn_link(value, new_entry);
218                 new_worklist->n_live_values++;
219         }
220 }
221
222 static worklist_t *duplicate_worklist(const worklist_t *worklist)
223 {
224         worklist_t       *new_worklist;
225         struct list_head *entry;
226
227         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
228         memset(new_worklist, 0, sizeof(new_worklist[0]));
229         INIT_LIST_HEAD(&new_worklist->live_values);
230         new_worklist->n_live_values = worklist->n_live_values;
231
232         list_for_each(entry, &worklist->live_values) {
233                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
234                 worklist_entry_t *new_entry
235                         = obstack_alloc(&obst, sizeof(new_entry[0]));
236
237                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
238                 list_add_tail(&new_entry->head, &new_worklist->live_values);
239         }
240
241         return new_worklist;
242 }
243
244 #ifdef DEBUG_libfirm
245 static void print_worklist(const worklist_t *worklist, int level)
246 {
247         struct list_head *entry;
248
249         DB((dbg, level, "%d values: ", worklist->n_live_values));
250         list_for_each(entry, &worklist->live_values) {
251                 worklist_entry_t *wl_entry
252                         = list_entry(entry, worklist_entry_t, head);
253
254                 DB((dbg, level, "%+F ", wl_entry->value));
255         }
256 }
257 #endif
258
259
260
261 static void clear_loop_info(ir_loop *loop)
262 {
263         int n_elements = get_loop_n_elements(loop);
264         int i;
265
266         loop->link = NULL;
267         for (i = 0; i < n_elements; ++i) {
268                 loop_element element = get_loop_element(loop, i);
269                 if (*element.kind != k_ir_loop)
270                         continue;
271
272                 clear_loop_info(element.son);
273         }
274 }
275
276 static loop_info_t *get_loop_info(ir_loop *loop)
277 {
278         loop_info_t *info = loop->link;
279         if (info != NULL)
280                 return info;
281
282         info = obstack_alloc(&obst, sizeof(info[0]));
283         memset(info, 0, sizeof(info[0]));
284         info->loop = loop;
285         loop->link = info;
286         return info;
287 }
288
289 /**
290  * constructs loop in+out edges when called in a block walker
291  */
292 static void construct_loop_edges(ir_node *block, void *data)
293 {
294         int      n_cfgpreds = get_Block_n_cfgpreds(block);
295         ir_loop *loop       = get_irn_loop(block);
296         int      i;
297
298         (void) data;
299
300         for (i = 0; i < n_cfgpreds; ++i) {
301                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
302                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
303                 loop_edge_t *edge;
304                 bool        is_exit_edge;
305                 ir_loop     *l, *goal;
306
307                 if (cfgpred_loop == loop)
308                         continue;
309
310                 /* critical edges are splitted, so we can't jump from 1 loop
311                  * directly into another without going out of it */
312                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
313
314                 /* edge out of loop */
315                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
316                         is_exit_edge = true;
317                         l            = cfgpred_loop;
318                         goal         = loop;
319                 } else {
320                         is_exit_edge = false;
321                         l            = loop;
322                         goal         = cfgpred_loop;
323                 }
324
325                 /* this might be a jump out/in multiple loops */
326                 do {
327                         loop_info_t *l_info = get_loop_info(l);
328
329                         edge = obstack_alloc(&obst, sizeof(edge[0]));
330                         memset(edge, 0, sizeof(edge[0]));
331                         edge->block = block;
332                         edge->pos   = i;
333
334                         if (is_exit_edge) {
335                                 edge->next         = l_info->exit_edges;
336                                 l_info->exit_edges = edge;
337                                 assert(get_loop_depth(loop) < get_loop_depth(l));
338                         } else {
339                                 edge->next          = l_info->entry_edges;
340                                 l_info->entry_edges = edge;
341                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
342                         }
343                         l = get_loop_outer_loop(l);
344                 } while(l != goal);
345         }
346 }
347
348
349
350
351 static void place_reload(worklist_entry_t *entry)
352 {
353         if (tentative_mode)
354                 return;
355
356         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
357                 entry->reload_point));
358         assert(entry->reload_point != NULL);
359         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
360         entry->reload_point = NULL;
361 }
362
363 /**
364  * makes sure the worklist contains not more than n_regs - room_needed entries
365  */
366 static void make_room(worklist_t *worklist, size_t room_needed)
367 {
368         int               i;
369         int               spills_needed;
370         struct list_head *entry;
371
372         spills_needed = worklist->n_live_values + room_needed - n_regs;
373         if (spills_needed <= 0)
374                 return;
375
376         entry = worklist->live_values.next;
377         for(i = spills_needed; i > 0; --i) {
378                 struct list_head *next = entry->next;
379                 worklist_entry_t *wl_entry
380                         = list_entry(entry, worklist_entry_t, head);
381
382                 assert(worklist_contains(wl_entry->value));
383                 mark_irn_not_visited(wl_entry->value);
384                 place_reload(wl_entry);
385                 list_del(entry);
386
387                 entry = next;
388         }
389         worklist->n_live_values -= spills_needed;
390 }
391
392 /**
393  * a value was used, so bring it to the back of the worklist (which might
394  * result in a spill of another value).
395  */
396 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
397 {
398         /* already in the worklist? move around, otherwise add at back */
399         worklist_entry_t *entry = get_irn_link(value);
400
401         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, value));
402
403         if (worklist_contains(value)) {
404                 assert(entry != NULL);
405
406                 list_del(&entry->head);
407         } else {
408                 if (entry == NULL) {
409                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
410                         memset(entry, 0, sizeof(entry[0]));
411
412                         entry->value = value;
413                         set_irn_link(value, entry);
414                 }
415
416                 ++worklist->n_live_values;
417                 mark_irn_visited(value);
418         }
419
420         entry->reload_point = sched_point;
421         list_add_tail(&entry->head, &worklist->live_values);
422 }
423
424 static void worklist_remove(worklist_t *worklist, ir_node *value)
425 {
426         worklist_entry_t *entry = get_irn_link(value);
427         assert(entry != NULL);
428         list_del(&entry->head);
429         --worklist->n_live_values;
430
431         assert(worklist_contains(value));
432         mark_irn_not_visited(value);
433 }
434
435 static void update_max_pressure(worklist_t *worklist)
436 {
437         if (worklist->n_live_values > max_register_pressure)
438                 max_register_pressure = worklist->n_live_values;
439 }
440
441 static void do_spilling(ir_node *block, worklist_t *worklist)
442 {
443         ir_node *node;
444
445         assert(worklist != NULL);
446
447         sched_foreach_reverse(block, node) {
448                 int    i, arity;
449                 size_t n_defs = 0;
450
451                 DB((dbg, LEVEL_2, "\t%+F... ", node));
452                 update_max_pressure(worklist);
453
454                 if (is_Phi(node)) {
455                         ir_node *node2;
456                         /* TODO: if we have some free registers, then we could decide to
457                          * not spill some phis (but not for phis where at least 1 input is
458                          * themselfes) */
459
460                         /* we have to spill all phis that are not live */
461                         sched_foreach_reverse_from(node, node2) {
462                                 assert(is_Phi(node2));
463
464                                 if (worklist_contains(node2))
465                                         continue;
466                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node2))
467                                         continue;
468
469                                 if (!tentative_mode)
470                                         be_spill_phi(senv, node2);
471                         }
472                         DB((dbg, LEVEL_2, "\n"));
473                         break;
474                 }
475
476                 /* remove values defined by this instruction from the workset. Values
477                  * defined but not in the workset need free registers */
478                 if (get_irn_mode(node) == mode_T) {
479                         const ir_edge_t *edge;
480
481                         foreach_out_edge(node, edge) {
482                                 ir_node *proj = get_edge_src_irn(edge);
483                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
484                                         continue;
485                                 if (worklist_contains(proj)) {
486                                         worklist_remove(worklist, proj);
487                                 } else {
488                                         ++n_defs;
489                                 }
490                         }
491                 } else if (arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
492                         if (worklist_contains(node)) {
493                                 worklist_remove(worklist, node);
494                         } else {
495                                 n_defs = 1;
496                         }
497                 }
498
499                 /* make sure we have enough free registers for the spills */
500                 make_room(worklist, n_defs);
501
502                 /* put all values used by the instruction into the workset */
503                 arity = get_irn_arity(node);
504                 for(i = 0; i < arity; ++i) {
505                         ir_node *use = get_irn_n(node, i);
506
507                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, use))
508                                 continue;
509
510                         val_used(worklist, use, node);
511                 }
512
513                 /* we might have too many values in the worklist now and need to spill
514                  * some */
515                 make_room(worklist, 0);
516
517 #ifdef DEBUG_libfirm
518                 print_worklist(worklist, LEVEL_2);
519                 DB((dbg, LEVEL_2, "\n"));
520 #endif
521         }
522
523         update_max_pressure(worklist);
524
525 #ifdef DEBUG_libfirm
526         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
527         print_worklist(worklist, LEVEL_1);
528         DB((dbg, LEVEL_1, "\n"));
529 #endif
530 }
531
532 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
533 {
534         const struct list_head *i1 = &wl1->live_values;
535         const struct list_head *i2 = &wl2->live_values;
536
537         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
538                         i1 = i1->next, i2 = i2->next) {
539                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
540                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
541
542                 if (entry1->value != entry2->value)
543                         return false;
544         }
545         /* both lists at the end */
546         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
547                 return false;
548
549         return true;
550 }
551
552 static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
553 {
554         double           best_execfreq   = -1;
555         worklist_t      *best_worklist   = NULL;
556         ir_node         *best_succ_block = NULL;
557         int              best_pos;
558         const ir_edge_t *edge;
559
560         /* construct worklist */
561         foreach_block_succ(block, edge) {
562                 ir_node *succ_block = get_edge_src_irn(edge);
563                 double   execfreq   = get_block_execfreq(exec_freq, succ_block);
564
565                 if (execfreq < best_execfreq)
566                         continue;
567
568                 block_info_t *block_info    = get_block_info(succ_block);
569                 worklist_t   *succ_worklist = block_info->start_worklist;
570
571                 if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
572                         continue;
573
574                 best_execfreq   = execfreq;
575                 best_worklist   = succ_worklist;
576                 best_succ_block = succ_block;
577                 best_pos        = get_edge_src_pos(edge);
578         }
579
580         if (best_worklist == NULL)
581                 return false;
582         best_worklist->visited = worklist_visited;
583
584         fill_and_activate_worklist(new_worklist, best_worklist, block,
585                         best_succ_block, best_pos);
586         return true;
587 }
588
589 static worklist_t *construct_start_worklist(ir_node *block)
590 {
591         worklist_t *worklist = new_worklist();
592
593         ++worklist_visited;
594
595         while(fill_start_worklist(worklist, block)) {
596                 if (worklist->n_live_values >= n_regs)
597                         break;
598         }
599
600         return worklist;
601 }
602
603 static void process_block(ir_node *block, void *env)
604 {
605         block_info_t    *block_info;
606         worklist_t      *worklist;
607         int              n_preds;
608
609         (void) env;
610         DB((dbg, LEVEL_1, "Processing %+F\n", block));
611
612         worklist   = construct_start_worklist(block);
613         block_info = get_block_info(block);
614
615 #ifdef DEBUG_libfirm
616         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
617         print_worklist(worklist, LEVEL_1);
618         DB((dbg, LEVEL_1, "\n"));
619
620         if (should_have_reached_fixpoint) {
621                 assert(worklists_equal(block_info->end_worklist, worklist));
622         }
623 #endif
624         block_info->end_worklist = duplicate_worklist(worklist);
625
626         do_spilling(block, worklist);
627         deactivate_worklist(worklist);
628
629 #ifdef DEBUG_libfirm
630         if (should_have_reached_fixpoint) {
631                 assert(worklists_equal(block_info->start_worklist, worklist));
632         }
633 #endif
634         block_info->start_worklist = worklist;
635
636         /* we shouldn't have any live values left at the start block */
637         n_preds = get_Block_n_cfgpreds(block);
638         //assert(n_preds != 0 || worklist->n_live_values == 0);
639 }
640
641 typedef struct block_or_loop_t block_or_loop_t;
642 struct block_or_loop_t {
643         union {
644                 ir_node *block;
645                 ir_loop *loop;
646         } v;
647         bool is_loop;
648 };
649
650 static block_or_loop_t *loop_blocks;
651 static ir_loop         *current_loop;
652
653 static void find_blocks(ir_node *block);
654
655 static void find_in_loop(ir_loop *loop, ir_node *entry)
656 {
657         loop_info_t     *loop_info = get_loop_info(loop);
658         block_or_loop_t block_or_loop;
659         loop_edge_t     *edge;
660
661         /* simply mark 1 block in the loop to indicate that the loop was already
662          * processed */
663         ir_node *some_block = loop_info->entry_edges->block;
664         if (Block_block_visited(some_block))
665                 return;
666
667         block_or_loop.v.loop  = loop;
668         block_or_loop.is_loop = true;
669         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
670
671 #ifndef NDEBUG
672         {
673                 /* we should be 1 of the entry blocks */
674                 loop_edge_t *edge  = loop_info->entry_edges;
675                 bool         found = false;
676                 for ( ; edge != NULL; edge = edge->next) {
677                         if (edge->block == entry)
678                                 found = true;
679                 }
680                 assert(found);
681         }
682 #endif
683         /* check all loop successors */
684         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
685                 ir_node *succ      = edge->block;
686                 ir_loop *succ_loop = get_irn_loop(succ);
687
688                 if (succ_loop == current_loop) {
689                         find_blocks(succ);
690                 } else {
691                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
692                 }
693         }
694 }
695
696 static void find_blocks(ir_node *block)
697 {
698         const ir_edge_t *edge;
699         block_or_loop_t block_or_loop;
700
701         if (Block_block_visited(block))
702                 return;
703
704         block_or_loop.v.block = block;
705         block_or_loop.is_loop = false;
706
707         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
708         mark_Block_block_visited(block);
709
710         foreach_block_succ(block, edge) {
711                 ir_node *succ = get_edge_src_irn(edge);
712
713                 /* is it part of the current loop? */
714                 ir_loop *loop = get_irn_loop(succ);
715                 if (loop != current_loop) {
716                         /* a sub-loop? */
717                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
718                                 find_in_loop(loop, succ);
719                         } else {
720                                 /* parent loop: we're not interested in the block */
721                         }
722                 } else {
723                         find_blocks(succ);
724                 }
725         }
726 }
727
728 /**
729  * append an entry to a worklist. WARNING: The entry must not already be in the
730  * worklist.
731  */
732 static void worklist_append(worklist_t *worklist, ir_node *value,
733                             ir_node *reload_point,
734                                                         ir_loop *unused_livethrough_loop)
735 {
736         worklist_entry_t *entry     = obstack_alloc(&obst, sizeof(entry[0]));
737         memset(entry, 0, sizeof(entry[0]));
738
739 #ifdef EXPENSIVE_CHECKS
740         {
741                 struct list_head *entry;
742                 list_for_each(entry, &worklist->live_values) {
743                         worklist_entry_t *wl_entry
744                                 = list_entry(entry, worklist_entry_t, head);
745                         assert(wl_entry->value != value);
746                 }
747         }
748 #endif
749
750         entry->value                   = value;
751         entry->reload_point            = reload_point;
752         entry->unused_livethrough_loop = unused_livethrough_loop;
753         list_add_tail(&entry->head, &worklist->live_values);
754         ++worklist->n_live_values;
755         assert(worklist->n_live_values <= n_regs);
756 }
757
758 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
759 {
760         ++worklist_visited;
761
762         /* add the value to all loop exit and entry blocks */
763         loop_edge_t *edge = loop_info->exit_edges;
764         for ( ; edge != NULL; edge = edge->next) {
765                 ir_node            *block
766                         = get_Block_cfgpred_block(edge->block, edge->pos);
767                 const block_info_t *info     = get_block_info(block);
768                 worklist_t         *worklist = info->end_worklist;
769
770                 if (worklist->visited >= worklist_visited)
771                         continue;
772                 worklist->visited = worklist_visited;
773
774                 /* TODO: we need a smarter mechanism here, that makes the reloader place
775                  * reload nodes on all loop exits... */
776                 ir_node *reload_point = NULL;
777
778                 worklist_append(worklist, value, reload_point, loop_info->loop);
779         }
780         edge = loop_info->entry_edges;
781         for ( ; edge != NULL; edge = edge->next) {
782                 ir_node            *entry_block = edge->block;
783                 const block_info_t *info        = get_block_info(entry_block);
784                 worklist_t         *worklist    = info->start_worklist;
785
786                 if (worklist->visited >= worklist_visited)
787                         continue;
788                 worklist->visited = worklist_visited;
789
790                 ir_node *pred_block
791                         = get_Block_cfgpred_block(entry_block, edge->pos);
792                 ir_node *reload_point = be_get_end_of_block_insertion_point(pred_block);
793
794                 worklist_append(worklist, value, reload_point, loop_info->loop);
795         }
796
797         set_irn_link(value, NULL);
798         ++loop_info->max_register_pressure;
799 }
800
801 static void push_unused_livethroughs(loop_info_t *loop_info)
802 {
803         loop_edge_t *edge;
804
805         /* we can only push unused livethroughs if register pressure inside the loop
806          * was low enough */
807         if (loop_info->max_register_pressure >= n_regs)
808                 return;
809
810         /* find unused livethroughs: register pressure in the loop was low enough
811          * which means that we had no spills which implies that at every point in
812          * the loop all*/
813         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
814                 ir_node            *block          = edge->block;
815                 const block_info_t *info           = get_block_info(block);
816                 worklist_t         *start_worklist = info->start_worklist;
817                 ir_node            *exit_block;
818                 const block_info_t *exit_info;
819                 worklist_t         *end_worklist;
820                 struct list_head   *entry;
821
822                 if (start_worklist == NULL)
823                         continue;
824
825                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
826                 exit_info    = get_block_info(exit_block);
827                 end_worklist = exit_info->end_worklist;
828
829                 activate_worklist(end_worklist);
830                 /* all values contained in the start_worklist, which are not available
831                  * in the end_worklist, must be unused livethroughs */
832
833                 list_for_each(entry, &start_worklist->live_values) {
834                         worklist_entry_t *wl_entry
835                                 = list_entry(entry, worklist_entry_t, head);
836                         ir_node *value = wl_entry->value;
837
838                         if (loop_info->max_register_pressure >= n_regs)
839                                 break;
840
841
842                         if (!worklist_contains(value)) {
843                                 /* add it to all loop exits */
844                                 DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
845                                 push_unused_livethrough(loop_info, value);
846                                 /* the value should now be part of end_worklist, so mark it */
847                                 mark_irn_visited(value);
848                         }
849                 }
850
851                 deactivate_worklist(end_worklist);
852         }
853 }
854
855 static void process_block_or_loop(const block_or_loop_t *block_or_loop)
856 {
857         if (block_or_loop->is_loop) {
858                 loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop);
859
860                 if (do_push_unused_livethroughs)
861                         push_unused_livethroughs(loop_info);
862
863                 if (loop_info->max_register_pressure > max_register_pressure)
864                         max_register_pressure = loop_info->max_register_pressure;
865
866                 return;
867         }
868         process_block(block_or_loop->v.block, NULL);
869 }
870
871 static void process_loop(ir_loop *loop)
872 {
873         int         n_elements = get_loop_n_elements(loop);
874         int         i, len;
875         loop_info_t *loop_info;
876         loop_edge_t *edge;
877         ir_node     *some_block;
878
879         /* first handle all sub-loops */
880         for (i = 0; i < n_elements; ++i) {
881                 loop_element element = get_loop_element(loop, i);
882                 if (*element.kind != k_ir_loop)
883                         continue;
884
885                 process_loop(element.son);
886         }
887
888         /* create a postorder of the blocks */
889         loop_info = get_loop_info(loop);
890         edge      = loop_info->entry_edges;
891         if (edge != NULL) {
892                 some_block = edge->block;
893         } else {
894                 assert(loop == get_irg_loop(current_ir_graph));
895                 some_block = get_irg_start_block(current_ir_graph);
896         }
897
898         loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
899         current_loop = loop;
900
901         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
902         inc_irg_block_visited(current_ir_graph);
903         find_blocks(some_block);
904         /* for endless loops the end-block might be unreachable */
905         if (loop == get_irg_loop(current_ir_graph)) {
906                 find_blocks(get_irg_end_block(current_ir_graph));
907         }
908         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
909
910         DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
911         len = ARR_LEN(loop_blocks);
912         for (i = 0; i < len; ++i) {
913                 block_or_loop_t *block_or_loop = &loop_blocks[i];
914                 if (block_or_loop->is_loop) {
915                         DB((dbg, LEVEL_3, " L-%p", block_or_loop->v.loop));
916                 } else {
917                         DB((dbg, LEVEL_3, " B-%+F", block_or_loop->v.block));
918                 }
919         }
920         DB((dbg, LEVEL_3, "\n"));
921
922         max_register_pressure = 0;
923
924         /* run1: tentative phase */
925         tentative_mode = true;
926         for (i = len-1; i >= 0; --i) {
927                 process_block_or_loop(&loop_blocks[i]);
928         }
929
930         /* run2: tentative phase - should reach fixpoint */
931         tentative_mode = true;
932         for (i = len-1; i >= 0; --i) {
933                 process_block_or_loop(&loop_blocks[i]);
934         }
935
936 #ifndef NDEBUG
937         /* run3: tentative phase - check fixpoint */
938         tentative_mode               = true;
939         should_have_reached_fixpoint = true;
940         for (i = len-1; i >= 0; --i) {
941                 process_block_or_loop(&loop_blocks[i]);
942         }
943         should_have_reached_fixpoint = false;
944 #endif
945
946         /* run4: add spills/reloads */
947         tentative_mode              = false;
948         do_push_unused_livethroughs = true;
949         for (i = len-1; i >= 0; --i) {
950                 process_block_or_loop(&loop_blocks[i]);
951         }
952         do_push_unused_livethroughs = false;
953
954         loop_info->max_register_pressure = max_register_pressure;
955         DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
956                                 (unsigned) max_register_pressure));
957
958         DEL_ARR_F(loop_blocks);
959 }
960
961 static void fix_block_borders(ir_node *block, void *data)
962 {
963         block_info_t *block_info     = get_block_info(block);
964         worklist_t   *start_worklist = block_info->start_worklist;
965         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
966         ir_loop      *loop           = get_irn_loop(block);
967         int           i;
968
969         (void) data;
970         assert(start_worklist != NULL);
971
972         for (i = 0; i < n_cfgpreds; ++i) {
973                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
974                 block_info_t *pred_block_info = get_block_info(pred_block);
975                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
976                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
977                 bool          is_loop_entry   = false;
978                 struct list_head *entry;
979
980                 assert(end_worklist != NULL);
981
982                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
983                         is_loop_entry = true;
984                 }
985
986                 /* reload missing values */
987                 activate_worklist(end_worklist);
988
989                 list_for_each(entry, &start_worklist->live_values) {
990                         worklist_entry_t *wl_entry
991                                 = list_entry(entry, worklist_entry_t, head);
992                         ir_node          *value = wl_entry->value;
993
994                         if (is_Phi(value) && get_nodes_block(value) == block) {
995                                 value = get_irn_n(value, i);
996
997                                 /* we might have unknowns as argument for the phi */
998                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
999                                         continue;
1000                         }
1001
1002                         if (worklist_contains(value))
1003                                 continue;
1004                         if (wl_entry->unused_livethrough_loop != NULL && !is_loop_entry)
1005                                 continue;
1006
1007                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
1008                 }
1009
1010                 deactivate_worklist(end_worklist);
1011         }
1012 }
1013
1014 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
1015 {
1016         ir_graph *irg = be_get_birg_irg(birg);
1017
1018         cls    = ncls;
1019         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1020
1021         /* shortcut for register classes with ignore regs only */
1022         if (n_regs == 0)
1023                 return;
1024
1025         worklist_visited = 0;
1026         arch_env         = be_get_birg_arch_env(birg);
1027         exec_freq        = be_get_birg_exec_freq(birg);
1028
1029         be_clear_links(irg);
1030         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1031                         | IR_RESOURCE_LOOP_LINK);
1032         inc_irg_visited(irg);
1033
1034         obstack_init(&obst);
1035         senv = be_new_spill_env(birg);
1036
1037         assure_cf_loop(irg);
1038         clear_loop_info(get_irg_loop(irg));
1039         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
1040
1041         process_loop(get_irg_loop(current_ir_graph));
1042
1043         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1044
1045         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1046                         | IR_RESOURCE_LOOP_LINK);
1047
1048         be_insert_spills_reloads(senv);
1049
1050         obstack_free(&obst, NULL);
1051
1052         /* clean up */
1053         be_delete_spill_env(senv);
1054 }
1055
1056 void be_init_spillbelady3(void)
1057 {
1058         static be_spiller_t belady3_spiller = {
1059                 be_spill_belady3
1060         };
1061
1062         be_register_spiller("belady3", &belady3_spiller);
1063         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1064 }
1065
1066 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);