738148d61c529fbc7841a7d4ad66bd784ef8c76a
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include <stdbool.h>
37
38 #include "debug.h"
39 #include "list.h"
40 #include "pdeq.h"
41
42 #include "irnode_t.h"
43 #include "irprintf.h"
44 #include "iredges_t.h"
45 #include "irloop_t.h"
46 #include "irgwalk.h"
47 #include "execfreq.h"
48
49 #include "bemodule.h"
50 #include "bespill.h"
51 #include "beutil.h"
52 #include "bespilloptions.h"
53 #include "besched_t.h"
54 #include "be_t.h"
55
56 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
57
58 typedef struct loop_edge_t loop_edge_t;
59 struct loop_edge_t {
60         ir_node     *block;
61         int          pos;
62         loop_edge_t *next;
63 };
64
65 typedef struct loop_info_t loop_info_t;
66 struct loop_info_t {
67         loop_edge_t *entry_edges;
68         loop_edge_t *exit_edges;
69         size_t       max_register_pressure;
70 };
71
72 typedef struct worklist_entry_t worklist_entry_t;
73 struct worklist_entry_t {
74         struct list_head  head;
75         ir_node          *value;
76         ir_node          *reload_point;
77         worklist_entry_t *next_reload;
78 };
79
80 typedef struct worklist_t worklist_t;
81 struct worklist_t {
82         struct list_head  live_values;
83         size_t            n_live_values;
84 };
85
86 typedef struct block_info_t block_info_t;
87 struct block_info_t {
88         worklist_t *start_worklist;
89         worklist_t *end_worklist;
90 };
91
92 static const arch_env_t            *arch_env;
93 static const arch_register_class_t *cls;
94 static struct obstack               obst;
95 static spill_env_t                 *senv;
96 static size_t                       n_regs;
97 static size_t                       max_register_pressure;
98 static bool                         tentative_mode;
99 static bool                         should_have_reached_fixpoint;
100 static ir_exec_freq                *exec_freq;
101
102 static worklist_t *new_worklist(void)
103 {
104         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
105
106         INIT_LIST_HEAD(&worklist->live_values);
107         worklist->n_live_values    = 0;
108
109         return worklist;
110 }
111
112 static void mark_irn_not_visited(ir_node *node)
113 {
114         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
115 }
116
117 static bool worklist_contains(const ir_node *node)
118 {
119         return irn_visited(node);
120 }
121
122 static block_info_t *get_block_info(ir_node *block)
123 {
124         block_info_t *info = get_irn_link(block);
125         if (info != NULL)
126                 return info;
127
128         info = obstack_alloc(&obst, sizeof(info[0]));
129         memset(info, 0, sizeof(info[0]));
130         set_irn_link(block, info);
131         return info;
132 }
133
134 static void deactivate_worklist(const worklist_t *worklist)
135 {
136         struct list_head *entry;
137
138         list_for_each(entry, &worklist->live_values) {
139                 worklist_entry_t *wl_entry
140                         = list_entry(entry, worklist_entry_t, head);
141                 assert(worklist_contains(wl_entry->value));
142                 mark_irn_not_visited(wl_entry->value);
143                 set_irn_link(wl_entry->value, NULL);
144         }
145 }
146
147 static void activate_worklist(const worklist_t *worklist)
148 {
149         struct list_head *entry;
150
151         list_for_each(entry, &worklist->live_values) {
152                 worklist_entry_t *wl_entry
153                         = list_entry(entry, worklist_entry_t, head);
154                 assert(!worklist_contains(wl_entry->value));
155                 mark_irn_visited(wl_entry->value);
156                 set_irn_link(wl_entry->value, wl_entry);
157         }
158 }
159
160 /**
161  * duplicate a worklist and directly activates it
162  */
163 static worklist_t *duplicate_activate_worklist(const worklist_t *worklist,
164                 ir_node *block, ir_node *succ_block, int succ_pos)
165 {
166         ir_node          *reload_point  = NULL;
167         size_t            n_live_values = 0;
168         worklist_t       *new_worklist;
169         struct list_head *entry;
170
171         if (succ_block != NULL &&
172                         (get_Block_n_cfgpreds(succ_block) > 1
173                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
174                 reload_point = be_get_end_of_block_insertion_point(block);
175         }
176
177         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
178         INIT_LIST_HEAD(&new_worklist->live_values);
179
180         list_for_each(entry, &worklist->live_values) {
181                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
182                 ir_node          *value     = wl_entry->value;
183                 worklist_entry_t *new_entry;
184
185                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
186                         value = get_Phi_pred(value, succ_pos);
187
188                         /* can happen for unknown phi preds */
189                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
190                                 continue;
191                 }
192
193                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
194                 new_entry->value = value;
195                 if (reload_point != NULL) {
196                         new_entry->reload_point = reload_point;
197                 } else {
198                         new_entry->reload_point = wl_entry->reload_point;
199                 }
200                 new_entry->next_reload = NULL;
201
202                 if (irn_not_visited(value)) {
203                         list_add_tail(&new_entry->head, &new_worklist->live_values);
204                         ++n_live_values;
205
206                         mark_irn_visited(value);
207                         set_irn_link(value, new_entry);
208                 }
209 #if 0
210                 else {
211                         /* the only way we might visit a value again, is when we get it as
212                          * phi predecessor */
213                         assert(is_Phi(wl_entry->value)
214                                         && get_nodes_block(wl_entry->value) == succ_block);
215                 }
216 #endif
217         }
218         new_worklist->n_live_values = n_live_values;
219
220         return new_worklist;
221 }
222
223 static worklist_t *duplicate_worklist(const worklist_t *worklist)
224 {
225         worklist_t       *new_worklist;
226         struct list_head *entry;
227
228         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
229         INIT_LIST_HEAD(&new_worklist->live_values);
230         new_worklist->n_live_values = worklist->n_live_values;
231
232         list_for_each(entry, &worklist->live_values) {
233                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
234                 worklist_entry_t *new_entry
235                         = obstack_alloc(&obst, sizeof(new_entry[0]));
236
237                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
238                 list_add_tail(&new_entry->head, &new_worklist->live_values);
239         }
240
241         return new_worklist;
242 }
243
244 #ifdef DEBUG_libfirm
245 static void print_worklist(const worklist_t *worklist, int level)
246 {
247         struct list_head *entry;
248
249         DB((dbg, level, "%d values: ", worklist->n_live_values));
250         list_for_each(entry, &worklist->live_values) {
251                 worklist_entry_t *wl_entry
252                         = list_entry(entry, worklist_entry_t, head);
253
254                 DB((dbg, level, "%+F ", wl_entry->value));
255         }
256 }
257 #endif
258
259
260
261 static void clear_loop_info(ir_loop *loop)
262 {
263         int n_elements = get_loop_n_elements(loop);
264         int i;
265
266         loop->link = NULL;
267         for (i = 0; i < n_elements; ++i) {
268                 loop_element element = get_loop_element(loop, i);
269                 if (*element.kind != k_ir_loop)
270                         continue;
271
272                 clear_loop_info(element.son);
273         }
274 }
275
276 static loop_info_t *get_loop_info(ir_loop *loop)
277 {
278         loop_info_t *info = loop->link;
279         if (info != NULL)
280                 return info;
281
282         info = obstack_alloc(&obst, sizeof(info[0]));
283         memset(info, 0, sizeof(info[0]));
284         loop->link = info;
285         return info;
286 }
287
288 /**
289  * constructs loop in+out edges when called in a block walker
290  */
291 static void construct_loop_edges(ir_node *block, void *data)
292 {
293         int      n_cfgpreds = get_Block_n_cfgpreds(block);
294         ir_loop *loop       = get_irn_loop(block);
295         int      i;
296
297         (void) data;
298
299         for (i = 0; i < n_cfgpreds; ++i) {
300                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
301                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
302                 loop_edge_t *edge;
303                 bool        is_exit_edge;
304                 ir_loop     *l, *goal;
305
306                 if (cfgpred_loop == loop)
307                         continue;
308
309                 /* critical edges are splitted, so we can't jump from 1 loop
310                  * directly into another without going out of it */
311                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
312
313                 /* edge out of loop */
314                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
315                         is_exit_edge = true;
316                         l            = cfgpred_loop;
317                         goal         = loop;
318                 } else {
319                         is_exit_edge = false;
320                         l            = loop;
321                         goal         = cfgpred_loop;
322                 }
323
324                 /* this might be a jump out/in multiple loops */
325                 do {
326                         loop_info_t *l_info = get_loop_info(l);
327
328                         edge = obstack_alloc(&obst, sizeof(edge[0]));
329                         memset(edge, 0, sizeof(edge[0]));
330                         edge->block = block;
331                         edge->pos   = i;
332
333                         if (is_exit_edge) {
334                                 ir_fprintf(stderr, "Loop %p exit edge %+F:%d\n",
335                                            l, block, i);
336                                 edge->next         = l_info->exit_edges;
337                                 l_info->exit_edges = edge;
338                                 assert(get_loop_depth(loop) < get_loop_depth(l));
339                         } else {
340                                 ir_fprintf(stderr, "Loop %p entry edge %+F:%d\n",
341                                            l, block, i);
342                                 edge->next          = l_info->entry_edges;
343                                 l_info->entry_edges = edge;
344                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
345                         }
346                         l = get_loop_outer_loop(l);
347                 } while(l != goal);
348         }
349 }
350
351
352
353
354 static void place_reload(worklist_entry_t *entry)
355 {
356         if (tentative_mode)
357                 return;
358
359         /* walk list of reload points */
360         do {
361                 DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
362                     entry->reload_point));
363                 assert(entry->reload_point != NULL);
364                 be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
365                 entry->reload_point = NULL;
366
367                 entry = entry->next_reload;
368         } while(entry != NULL);
369 }
370
371 #if 0
372 /**
373  * does stuff required for non-active worklists: spills all values not live
374  * in the active worklist; links live values to the current worklist
375  */
376 static void process_non_active_worklist(const worklist_t *worklist,
377                                         worklist_t *current_worklist)
378 {
379         struct list_head *entry;
380
381         list_for_each(entry, &worklist->live_values) {
382                 worklist_entry_t *wl_entry = list_entry(entry, worklist_entry_t, head);
383                 ir_node          *value    = wl_entry->value;
384
385                 if (!worklist_contains(value)) {
386                         /* if we still have room in the worklist, then we can simply take
387                          * the value */
388                         if (current_worklist->n_live_values < n_regs) {
389                                 worklist_entry_t *new_entry
390                                         = obstack_alloc(&obst, sizeof(new_entry[0]));
391
392                                 new_entry->value        = value;
393                                 new_entry->reload_point = wl_entry->reload_point;
394                                 new_entry->next_reload  = wl_entry->next_reload;
395                                 set_irn_link(value, new_entry);
396                                 mark_irn_visited(value);
397                                 list_add(&new_entry->head, &current_worklist->live_values);
398                                 ++current_worklist->n_live_values;
399                         } else {
400                                 /* value is not in current worklist, place reloads */
401                                 place_reload(wl_entry);
402                         }
403                 } else {
404                         /* value is in current worklist, link it with the reload point
405                          * from this path */
406                         worklist_entry_t *active_entry = get_irn_link(value);
407                         wl_entry->next_reload          = active_entry->next_reload;
408                         active_entry->next_reload      = wl_entry;
409                 }
410         }
411 }
412 #endif
413
414 /**
415  * makes sure the worklist contains not more than n_regs - room_needed entries
416  */
417 static void make_room(worklist_t *worklist, size_t room_needed)
418 {
419         int               i;
420         int               spills_needed;
421         struct list_head *entry;
422
423         spills_needed = worklist->n_live_values + room_needed - n_regs;
424         if (spills_needed <= 0)
425                 return;
426
427         entry = worklist->live_values.next;
428         for(i = spills_needed; i > 0; --i) {
429                 struct list_head *next = entry->next;
430                 worklist_entry_t *wl_entry
431                         = list_entry(entry, worklist_entry_t, head);
432
433                 assert(worklist_contains(wl_entry->value));
434                 mark_irn_not_visited(wl_entry->value);
435                 place_reload(wl_entry);
436                 list_del(entry);
437
438                 entry = next;
439         }
440         worklist->n_live_values -= spills_needed;
441 }
442
443 /**
444  * a value was used, so bring it to the back of the worklist (which might
445  * result in a spill of another value).
446  */
447 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
448 {
449         /* already in the worklist? move around, otherwise add at back */
450         worklist_entry_t *entry = get_irn_link(value);
451
452         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, value));
453
454         if (worklist_contains(value)) {
455                 assert(entry != NULL);
456
457                 list_del(&entry->head);
458         } else {
459                 if (entry == NULL) {
460                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
461                         entry->value = value;
462                         set_irn_link(value, entry);
463                 }
464
465                 ++worklist->n_live_values;
466                 mark_irn_visited(value);
467         }
468
469         entry->reload_point = sched_point;
470         entry->next_reload  = NULL;
471         list_add_tail(&entry->head, &worklist->live_values);
472 }
473
474 static void worklist_remove(worklist_t *worklist, ir_node *value)
475 {
476         worklist_entry_t *entry = get_irn_link(value);
477         assert(entry != NULL);
478         list_del(&entry->head);
479         --worklist->n_live_values;
480
481         assert(worklist_contains(value));
482         mark_irn_not_visited(value);
483 }
484
485 static void update_max_pressure(worklist_t *worklist)
486 {
487         if (worklist->n_live_values > max_register_pressure)
488                 max_register_pressure = worklist->n_live_values;
489 }
490
491 static void do_spilling(ir_node *block, worklist_t *worklist)
492 {
493         ir_node *node;
494
495         assert(worklist != NULL);
496
497         sched_foreach_reverse(block, node) {
498                 int    i, arity;
499                 size_t n_defs = 0;
500
501                 DB((dbg, LEVEL_2, "\t%+F... ", node));
502                 update_max_pressure(worklist);
503
504                 if (is_Phi(node)) {
505                         ir_node *node2;
506                         /* TODO: if we have some free registers, then we could decide to
507                          * not spill some phis (but not for phis where at least 1 input is
508                          * themselfes) */
509
510                         /* we have to spill all phis that are not live */
511                         sched_foreach_reverse_from(node, node2) {
512                                 assert(is_Phi(node2));
513
514                                 if (worklist_contains(node2))
515                                         continue;
516                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node2))
517                                         continue;
518
519                                 if (!tentative_mode)
520                                         be_spill_phi(senv, node2);
521                         }
522                         DB((dbg, LEVEL_2, "\n"));
523                         break;
524                 }
525
526                 /* remove values defined by this instruction from the workset. Values
527                  * defined but not in the workset need free registers */
528                 if (get_irn_mode(node) == mode_T) {
529                         const ir_edge_t *edge;
530
531                         foreach_out_edge(node, edge) {
532                                 ir_node *proj = get_edge_src_irn(edge);
533                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
534                                         continue;
535                                 if (worklist_contains(proj)) {
536                                         worklist_remove(worklist, proj);
537                                 } else {
538                                         ++n_defs;
539                                 }
540                         }
541                 } else if (arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
542                         if (worklist_contains(node)) {
543                                 worklist_remove(worklist, node);
544                         } else {
545                                 n_defs = 1;
546                         }
547                 }
548
549                 /* make sure we have enough free registers for the spills */
550                 make_room(worklist, n_defs);
551
552                 /* put all values used by the instruction into the workset */
553                 arity = get_irn_arity(node);
554                 for(i = 0; i < arity; ++i) {
555                         ir_node *use = get_irn_n(node, i);
556
557                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, use))
558                                 continue;
559
560                         val_used(worklist, use, node);
561                 }
562
563                 /* we might have too many values in the worklist now and need to spill
564                  * some */
565                 make_room(worklist, 0);
566
567 #ifdef DEBUG_libfirm
568                 print_worklist(worklist, LEVEL_2);
569                 DB((dbg, LEVEL_2, "\n"));
570 #endif
571         }
572
573         update_max_pressure(worklist);
574
575 #ifdef DEBUG_libfirm
576         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
577         print_worklist(worklist, LEVEL_1);
578         DB((dbg, LEVEL_1, "\n"));
579 #endif
580 }
581
582 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
583 {
584         const struct list_head *i1 = &wl1->live_values;
585         const struct list_head *i2 = &wl2->live_values;
586
587         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
588                         i1 = i1->next, i2 = i2->next) {
589                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
590                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
591
592                 if (entry1->value != entry2->value)
593                         return false;
594         }
595         /* both lists at the end */
596         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
597                 return false;
598
599         return true;
600 }
601
602 static void process_block(ir_node *block, void *env)
603 {
604         block_info_t    *block_info      = NULL;
605         worklist_t      *worklist        = NULL;
606         double           best_execfreq   = -1;
607         ir_node         *best_succ_block = NULL;
608         int              best_pos        = -1;
609         int              n_preds;
610         const ir_edge_t *edge;
611
612         (void) env;
613         DB((dbg, LEVEL_1, "Processing %+F\n", block));
614
615         /* construct worklist */
616         foreach_block_succ(block, edge) {
617                 ir_node *succ_block = get_edge_src_irn(edge);
618                 double   execfreq   = get_block_execfreq(exec_freq, succ_block);
619
620                 if (execfreq > best_execfreq) {
621                         block_info_t *block_info    = get_block_info(succ_block);
622                         worklist_t   *succ_worklist = block_info->start_worklist;
623                         if (succ_worklist != NULL) {
624                                 best_execfreq   = execfreq;
625                                 worklist        = succ_worklist;
626                                 best_succ_block = succ_block;
627                                 best_pos        = get_edge_src_pos(edge);
628                         }
629                 }
630         }
631         if (worklist == NULL) {
632                 /* at least 1 successor block must have been processed unless it was
633                  * the end block */
634                 assert(tentative_mode || block == get_irg_end_block(get_irn_irg(block)));
635
636                 worklist = new_worklist();
637         } else {
638                 worklist = duplicate_activate_worklist(worklist, block, best_succ_block,
639                                                        best_pos);
640
641                 /* handle this in fix_blocks later... */
642 #if 0
643                 /* now we could have live values in the succ worklists that are not
644                  * live anymore in the worklist we picked. We need reloads for them. */
645                 foreach_block_succ(block, edge) {
646                         ir_node      *succ_block    = get_edge_src_irn(edge);
647                         block_info_t *block_info    = get_block_info(succ_block);
648                         worklist_t   *succ_worklist = block_info->start_worklist;
649
650                         if (succ_block == best_succ_block)
651                                 continue;
652
653                         process_non_active_worklist(succ_worklist, worklist);
654                 }
655 #endif
656         }
657
658         block_info = get_block_info(block);
659
660 #ifdef DEBUG_libfirm
661         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
662         print_worklist(worklist, LEVEL_1);
663         DB((dbg, LEVEL_1, "\n"));
664
665         if (should_have_reached_fixpoint) {
666                 assert(worklists_equal(block_info->end_worklist, worklist));
667         }
668 #endif
669         block_info->end_worklist = duplicate_worklist(worklist);
670
671         do_spilling(block, worklist);
672         deactivate_worklist(worklist);
673
674 #ifdef DEBUG_libfirm
675         if (should_have_reached_fixpoint) {
676                 assert(worklists_equal(block_info->start_worklist, worklist));
677         }
678 #endif
679         block_info->start_worklist = worklist;
680
681         /* we shouldn't have any live values left at the start block */
682         n_preds = get_Block_n_cfgpreds(block);
683         assert(n_preds != 0 || worklist->n_live_values == 0);
684 }
685
686 typedef struct block_or_loop_t block_or_loop_t;
687 struct block_or_loop_t {
688         union {
689                 ir_node *block;
690                 ir_loop *loop;
691         } v;
692         bool is_loop;
693 };
694
695 static block_or_loop_t *loop_blocks;
696 static ir_loop         *current_loop;
697
698 static void find_blocks(ir_node *block);
699
700 static void find_in_loop(ir_loop *loop, ir_node *entry)
701 {
702         loop_info_t     *loop_info = get_loop_info(loop);
703         block_or_loop_t block_or_loop;
704         loop_edge_t     *edge;
705
706         /* simply mark 1 block in the loop to indicate that the loop was already
707          * processed */
708         ir_node *some_block = loop_info->entry_edges->block;
709         if (Block_block_visited(some_block))
710                 return;
711
712         block_or_loop.v.loop  = loop;
713         block_or_loop.is_loop = true;
714         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
715
716 #ifndef NDEBUG
717         {
718                 /* we should be 1 of the entry blocks */
719                 loop_edge_t *edge  = loop_info->entry_edges;
720                 bool         found = false;
721                 for ( ; edge != NULL; edge = edge->next) {
722                         if (edge->block == entry)
723                                 found = true;
724                 }
725                 assert(found);
726         }
727 #endif
728         /* check all loop successors */
729         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
730                 ir_node *succ      = edge->block;
731                 ir_loop *succ_loop = get_irn_loop(succ);
732
733                 if (succ_loop == current_loop) {
734                         find_blocks(succ);
735                 } else {
736                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
737                 }
738         }
739 }
740
741 static void find_blocks(ir_node *block)
742 {
743         const ir_edge_t *edge;
744         block_or_loop_t block_or_loop;
745
746         if (Block_block_visited(block))
747                 return;
748
749         block_or_loop.v.block = block;
750         block_or_loop.is_loop = false;
751
752         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
753         mark_Block_block_visited(block);
754
755         foreach_block_succ(block, edge) {
756                 ir_node *succ = get_edge_src_irn(edge);
757
758                 /* is it part of the current loop? */
759                 ir_loop *loop = get_irn_loop(succ);
760                 if (loop != current_loop) {
761                         /* a sub-loop? */
762                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
763                                 find_in_loop(loop, succ);
764                         } else {
765                                 /* parent loop: we're not interested in the block */
766                         }
767                 } else {
768                         find_blocks(succ);
769                 }
770         }
771 }
772
773 static void process_block_or_loop(const block_or_loop_t *block_or_loop)
774 {
775         if (block_or_loop->is_loop) {
776                 loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop);
777
778                 if (loop_info->max_register_pressure > max_register_pressure)
779                         max_register_pressure = loop_info->max_register_pressure;
780
781                 /* TODO: push unused livethroughs... */
782                 return;
783         }
784         process_block(block_or_loop->v.block, NULL);
785 }
786
787 static void process_loop(ir_loop *loop)
788 {
789         int         n_elements = get_loop_n_elements(loop);
790         int         i, len;
791         loop_info_t *loop_info;
792         loop_edge_t *edge;
793         ir_node     *some_block;
794
795         /* first handle all sub-loops */
796         for (i = 0; i < n_elements; ++i) {
797                 loop_element element = get_loop_element(loop, i);
798                 if (*element.kind != k_ir_loop)
799                         continue;
800
801                 process_loop(element.son);
802         }
803
804         /* create a postorder of the blocks */
805         loop_info = get_loop_info(loop);
806         edge      = loop_info->entry_edges;
807         if (edge != NULL) {
808                 some_block = edge->block;
809         } else {
810                 assert(loop == get_irg_loop(current_ir_graph));
811                 some_block = get_irg_start_block(current_ir_graph);
812         }
813
814         loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
815         current_loop = loop;
816
817         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
818         inc_irg_block_visited(current_ir_graph);
819         find_blocks(some_block);
820         /* for endless loops the end-block might be unreachable */
821         if (loop == get_irg_loop(current_ir_graph)) {
822                 find_blocks(get_irg_end_block(current_ir_graph));
823         }
824         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
825
826         fprintf(stderr, "Block List for loop %p\n", loop);
827         len = ARR_LEN(loop_blocks);
828         for (i = 0; i < len; ++i) {
829                 block_or_loop_t *block_or_loop = &loop_blocks[i];
830                 if (block_or_loop->is_loop) {
831                         ir_fprintf(stderr, " L-%p", block_or_loop->v.loop);
832                 } else {
833                         ir_fprintf(stderr, " B-%+F", block_or_loop->v.block);
834                 }
835         }
836         fprintf(stderr, "\n");
837
838         max_register_pressure = 0;
839
840         /* run1: tentative phase */
841         tentative_mode = true;
842         for (i = len-1; i >= 0; --i) {
843                 process_block_or_loop(&loop_blocks[i]);
844         }
845
846         /* run2: tentative phase - should reach fixpoint */
847         tentative_mode = true;
848         for (i = len-1; i >= 0; --i) {
849                 process_block_or_loop(&loop_blocks[i]);
850         }
851
852 #ifndef NDEBUG
853         /* run3: tentative phase - check fixpoint */
854         tentative_mode               = true;
855         should_have_reached_fixpoint = true;
856         for (i = len-1; i >= 0; --i) {
857                 process_block_or_loop(&loop_blocks[i]);
858         }
859         should_have_reached_fixpoint = false;
860 #endif
861
862         /* run4: add spills/reloads */
863         tentative_mode = false;
864         for (i = len-1; i >= 0; --i) {
865                 process_block_or_loop(&loop_blocks[i]);
866         }
867
868         loop_info->max_register_pressure = max_register_pressure;
869         ir_fprintf(stderr, "Regpressure in loop %p: %u\n", loop,
870                    (unsigned) max_register_pressure);
871
872         DEL_ARR_F(loop_blocks);
873 }
874
875 static void fix_block_borders(ir_node *block, void *data)
876 {
877         block_info_t *block_info     = get_block_info(block);
878         worklist_t   *start_worklist = block_info->start_worklist;
879         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
880         int           i;
881
882         (void) data;
883         assert(start_worklist != NULL);
884
885         for (i = 0; i < n_cfgpreds; ++i) {
886                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
887                 block_info_t *pred_block_info = get_block_info(pred_block);
888                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
889                 struct list_head *entry;
890
891                 assert(end_worklist != NULL);
892
893                 /* reload missing values */
894                 activate_worklist(end_worklist);
895
896                 list_for_each(entry, &start_worklist->live_values) {
897                         worklist_entry_t *wl_entry
898                                 = list_entry(entry, worklist_entry_t, head);
899                         ir_node          *value = wl_entry->value;
900
901                         if (is_Phi(value) && get_nodes_block(value) == block) {
902                                 value = get_irn_n(value, i);
903
904                                 /* we might have unknowns as argument for the phi */
905                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
906                                         continue;
907                         }
908
909                         if (worklist_contains(value))
910                                 continue;
911
912                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
913                 }
914
915                 deactivate_worklist(end_worklist);
916         }
917 }
918
919 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
920 {
921         ir_graph *irg = be_get_birg_irg(birg);
922
923         cls    = ncls;
924         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
925
926         /* shortcut for register classes with ignore regs only */
927         if (n_regs == 0)
928                 return;
929
930         arch_env  = be_get_birg_arch_env(birg);
931         exec_freq = be_get_birg_exec_freq(birg);
932
933         be_clear_links(irg);
934         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
935                         | IR_RESOURCE_LOOP_LINK);
936         inc_irg_visited(irg);
937
938         obstack_init(&obst);
939         senv = be_new_spill_env(birg);
940
941         assure_cf_loop(irg);
942         clear_loop_info(get_irg_loop(irg));
943         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
944
945         process_loop(get_irg_loop(current_ir_graph));
946
947 #if 0
948         /* do a post-order walk over the CFG to make sure we have a maximum number
949          * of preds processed before entering a block */
950         tentative_mode = 1;
951         irg_block_edges_walk(get_irg_start_block(irg), NULL, process_block, NULL);
952
953         tentative_mode = 1;
954         irg_block_edges_walk(get_irg_start_block(irg), NULL, process_block, NULL);
955
956         tentative_mode = 0;
957         irg_block_edges_walk(get_irg_start_block(irg), NULL, process_block, NULL);
958 #endif
959
960         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
961
962         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
963                         | IR_RESOURCE_LOOP_LINK);
964
965         be_insert_spills_reloads(senv);
966
967         obstack_free(&obst, NULL);
968
969         /* clean up */
970         be_delete_spill_env(senv);
971 }
972
973 void be_init_spillbelady3(void)
974 {
975         static be_spiller_t belady3_spiller = {
976                 be_spill_belady3
977         };
978
979         be_register_spiller("belady3", &belady3_spiller);
980         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
981 }
982
983 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);