- simplified
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide whether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #include "config.h"
33
34 #include <stdbool.h>
35
36 #include "debug.h"
37 #include "list.h"
38 #include "pdeq.h"
39
40 #include "irnode_t.h"
41 #include "irprintf.h"
42 #include "iredges_t.h"
43 #include "irloop_t.h"
44 #include "irgwalk.h"
45 #include "execfreq.h"
46
47 #include "bemodule.h"
48 #include "bespill.h"
49 #include "beutil.h"
50 #include "bespilloptions.h"
51 #include "besched_t.h"
52 #include "be_t.h"
53
54 #ifndef NDEBUG
55 #define EXPENSIVE_CHECKS
56 #endif
57
58 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
59
60 typedef struct loop_edge_t loop_edge_t;
61 struct loop_edge_t {
62         ir_node     *block;
63         int          pos;
64         loop_edge_t *next;
65 };
66
67 typedef struct loop_info_t loop_info_t;
68 struct loop_info_t {
69         ir_loop     *loop;
70         loop_edge_t *entry_edges;
71         loop_edge_t *exit_edges;
72         size_t       max_register_pressure;
73 };
74
75 typedef struct worklist_entry_t worklist_entry_t;
76 struct worklist_entry_t {
77         struct list_head  head;
78         ir_node          *value;
79         ir_node          *reload_point;
80         ir_loop          *unused_livethrough_loop;
81 };
82
83 typedef struct worklist_t worklist_t;
84 struct worklist_t {
85         struct list_head  live_values;
86         size_t            n_live_values;
87         ir_visited_t      visited;
88 };
89
90 typedef struct block_info_t block_info_t;
91 struct block_info_t {
92         worklist_t *start_worklist;
93         worklist_t *end_worklist;
94 };
95
96 /** The register class we currently run on. */
97 static const arch_register_class_t *cls;
98 static struct obstack               obst;
99 static spill_env_t                 *senv;
100 static size_t                       n_regs;
101 static size_t                       max_register_pressure;
102 static bool                         tentative_mode;
103 static bool                         should_have_reached_fixpoint;
104 static bool                         do_push_unused_livethroughs;
105 /** Execution frequency for the current graph. */
106 static ir_exec_freq                *exec_freq;
107 static ir_visited_t                 worklist_visited;
108
109 static worklist_t *new_worklist(void)
110 {
111         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
112         memset(worklist, 0, sizeof(worklist[0]));
113
114         INIT_LIST_HEAD(&worklist->live_values);
115         worklist->n_live_values    = 0;
116
117         return worklist;
118 }
119
120 static void mark_irn_not_visited(ir_node *node)
121 {
122         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
123 }
124
125 static bool worklist_contains(const ir_node *node)
126 {
127         return irn_visited(node);
128 }
129
130 static block_info_t *get_block_info(ir_node *block)
131 {
132         block_info_t *info = get_irn_link(block);
133         if (info != NULL)
134                 return info;
135
136         info = obstack_alloc(&obst, sizeof(info[0]));
137         memset(info, 0, sizeof(info[0]));
138         set_irn_link(block, info);
139         return info;
140 }
141
142 static void deactivate_worklist(const worklist_t *worklist)
143 {
144         struct list_head *entry;
145
146         list_for_each(entry, &worklist->live_values) {
147                 worklist_entry_t *wl_entry
148                         = list_entry(entry, worklist_entry_t, head);
149                 assert(worklist_contains(wl_entry->value));
150                 mark_irn_not_visited(wl_entry->value);
151                 set_irn_link(wl_entry->value, NULL);
152         }
153 }
154
155 static void activate_worklist(const worklist_t *worklist)
156 {
157         struct list_head *entry;
158
159         list_for_each(entry, &worklist->live_values) {
160                 worklist_entry_t *wl_entry
161                         = list_entry(entry, worklist_entry_t, head);
162                 assert(!worklist_contains(wl_entry->value));
163                 mark_irn_visited(wl_entry->value);
164                 set_irn_link(wl_entry->value, wl_entry);
165         }
166 }
167
168 /**
169  * duplicate a worklist and directly activates it
170  */
171 static void fill_and_activate_worklist(worklist_t *new_worklist,
172                 const worklist_t *worklist, ir_node *block, ir_node *succ_block,
173                 int succ_pos)
174 {
175         ir_node          *reload_point  = NULL;
176         size_t            n_live_values = 0;
177         struct list_head *entry;
178
179         if (succ_block != NULL &&
180                         (get_Block_n_cfgpreds(succ_block) > 1
181                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
182                 reload_point = be_get_end_of_block_insertion_point(block);
183         }
184
185         list_for_each(entry, &worklist->live_values) {
186                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
187                 ir_node          *value     = wl_entry->value;
188                 worklist_entry_t *new_entry;
189
190                 if (new_worklist->n_live_values >= n_regs)
191                         break;
192
193                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
194                         value = get_Phi_pred(value, succ_pos);
195
196                         /* can happen for unknown phi preds */
197                         if (!arch_irn_consider_in_reg_alloc(cls, value))
198                                 continue;
199                 }
200
201                 if (irn_visited_else_mark(value))
202                         continue;
203
204                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
205                 memset(new_entry, 0, sizeof(new_entry[0]));
206
207                 new_entry->value = value;
208                 if (reload_point != NULL) {
209                         new_entry->reload_point = reload_point;
210                 } else {
211                         new_entry->reload_point = wl_entry->reload_point;
212                 }
213
214                 list_add_tail(&new_entry->head, &new_worklist->live_values);
215                 ++n_live_values;
216
217                 set_irn_link(value, new_entry);
218                 new_worklist->n_live_values++;
219         }
220 }
221
222 static worklist_t *duplicate_worklist(const worklist_t *worklist)
223 {
224         worklist_t       *new_worklist;
225         struct list_head *entry;
226
227         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
228         memset(new_worklist, 0, sizeof(new_worklist[0]));
229         INIT_LIST_HEAD(&new_worklist->live_values);
230         new_worklist->n_live_values = worklist->n_live_values;
231
232         list_for_each(entry, &worklist->live_values) {
233                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
234                 worklist_entry_t *new_entry
235                         = obstack_alloc(&obst, sizeof(new_entry[0]));
236
237                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
238                 list_add_tail(&new_entry->head, &new_worklist->live_values);
239         }
240
241         return new_worklist;
242 }
243
244 #ifdef DEBUG_libfirm
245 static void print_worklist(const worklist_t *worklist, int level)
246 {
247         struct list_head *entry;
248
249         DB((dbg, level, "%d values: ", worklist->n_live_values));
250         list_for_each(entry, &worklist->live_values) {
251                 worklist_entry_t *wl_entry
252                         = list_entry(entry, worklist_entry_t, head);
253
254                 DB((dbg, level, "%+F ", wl_entry->value));
255         }
256 }
257 #endif
258
259
260 /**
261  * Clear the link fields of a loop and all
262  * its child loops.
263  *
264  * @param loop  the root loop
265  */
266 static void clear_loop_info(ir_loop *loop)
267 {
268         int n_elements = get_loop_n_elements(loop);
269         int i;
270
271         loop->link = NULL;
272         for (i = 0; i < n_elements; ++i) {
273                 loop_element element = get_loop_element(loop, i);
274                 if (*element.kind != k_ir_loop)
275                         continue;
276
277                 clear_loop_info(element.son);
278         }
279 }
280
281 static loop_info_t *get_loop_info(ir_loop *loop)
282 {
283         loop_info_t *info = loop->link;
284         if (info != NULL)
285                 return info;
286
287         info = obstack_alloc(&obst, sizeof(info[0]));
288         memset(info, 0, sizeof(info[0]));
289         info->loop = loop;
290         loop->link = info;
291         return info;
292 }
293
294 /**
295  * constructs loop in+out edges when called in a block walker
296  */
297 static void construct_loop_edges(ir_node *block, void *data)
298 {
299         int      n_cfgpreds = get_Block_n_cfgpreds(block);
300         ir_loop *loop       = get_irn_loop(block);
301         int      i;
302
303         (void) data;
304
305         for (i = 0; i < n_cfgpreds; ++i) {
306                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
307                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
308                 loop_edge_t *edge;
309                 bool        is_exit_edge;
310                 ir_loop     *l, *goal;
311
312                 if (cfgpred_loop == loop)
313                         continue;
314
315                 /* critical edges are splitted, so we can't jump from 1 loop
316                  * directly into another without going out of it */
317                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
318
319                 /* edge out of loop */
320                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
321                         is_exit_edge = true;
322                         l            = cfgpred_loop;
323                         goal         = loop;
324                 } else {
325                         is_exit_edge = false;
326                         l            = loop;
327                         goal         = cfgpred_loop;
328                 }
329
330                 /* this might be a jump out/in multiple loops */
331                 do {
332                         loop_info_t *l_info = get_loop_info(l);
333
334                         edge = obstack_alloc(&obst, sizeof(edge[0]));
335                         memset(edge, 0, sizeof(edge[0]));
336                         edge->block = block;
337                         edge->pos   = i;
338
339                         if (is_exit_edge) {
340                                 edge->next         = l_info->exit_edges;
341                                 l_info->exit_edges = edge;
342                                 assert(get_loop_depth(loop) < get_loop_depth(l));
343                         } else {
344                                 edge->next          = l_info->entry_edges;
345                                 l_info->entry_edges = edge;
346                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
347                         }
348                         l = get_loop_outer_loop(l);
349                 } while (l != goal);
350         }
351 }
352
353
354
355
356 static void place_reload(worklist_entry_t *entry)
357 {
358         if (tentative_mode)
359                 return;
360
361         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
362                 entry->reload_point));
363         assert(entry->reload_point != NULL);
364         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
365         entry->reload_point = NULL;
366 }
367
368 /**
369  * makes sure the worklist contains not more than n_regs - room_needed entries
370  */
371 static void make_room(worklist_t *worklist, size_t room_needed)
372 {
373         int               i;
374         int               spills_needed;
375         struct list_head *entry;
376
377         spills_needed = worklist->n_live_values + room_needed - n_regs;
378         if (spills_needed <= 0)
379                 return;
380
381         entry = worklist->live_values.next;
382         for(i = spills_needed; i > 0; --i) {
383                 struct list_head *next = entry->next;
384                 worklist_entry_t *wl_entry
385                         = list_entry(entry, worklist_entry_t, head);
386
387                 assert(worklist_contains(wl_entry->value));
388                 mark_irn_not_visited(wl_entry->value);
389                 place_reload(wl_entry);
390                 list_del(entry);
391
392                 entry = next;
393         }
394         worklist->n_live_values -= spills_needed;
395 }
396
397 /**
398  * a value was used, so bring it to the back of the worklist (which might
399  * result in a spill of another value).
400  */
401 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
402 {
403         /* already in the worklist? move around, otherwise add at back */
404         worklist_entry_t *entry = get_irn_link(value);
405
406         assert(arch_irn_consider_in_reg_alloc(cls, value));
407
408         if (worklist_contains(value)) {
409                 assert(entry != NULL);
410
411                 list_del(&entry->head);
412         } else {
413                 if (entry == NULL) {
414                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
415                         memset(entry, 0, sizeof(entry[0]));
416
417                         entry->value = value;
418                         set_irn_link(value, entry);
419                 }
420
421                 ++worklist->n_live_values;
422                 mark_irn_visited(value);
423         }
424
425         entry->reload_point = sched_point;
426         list_add_tail(&entry->head, &worklist->live_values);
427 }
428
429 static void worklist_remove(worklist_t *worklist, ir_node *value)
430 {
431         worklist_entry_t *entry = get_irn_link(value);
432         assert(entry != NULL);
433         list_del(&entry->head);
434         --worklist->n_live_values;
435
436         assert(worklist_contains(value));
437         mark_irn_not_visited(value);
438 }
439
440 static void update_max_pressure(worklist_t *worklist)
441 {
442         if (worklist->n_live_values > max_register_pressure)
443                 max_register_pressure = worklist->n_live_values;
444 }
445
446 static void do_spilling(ir_node *block, worklist_t *worklist)
447 {
448         ir_node *node;
449
450         assert(worklist != NULL);
451
452         sched_foreach_reverse(block, node) {
453                 int    i, arity;
454                 size_t n_defs = 0;
455
456                 DB((dbg, LEVEL_2, "\t%+F... ", node));
457                 update_max_pressure(worklist);
458
459                 if (is_Phi(node)) {
460                         ir_node *node2;
461                         /* TODO: if we have some free registers, then we could decide to
462                          * not spill some phis (but not for phis where at least 1 input is
463                          * themselfes) */
464
465                         /* we have to spill all phis that are not live */
466                         sched_foreach_reverse_from(node, node2) {
467                                 assert(is_Phi(node2));
468
469                                 if (worklist_contains(node2))
470                                         continue;
471                                 if (!arch_irn_consider_in_reg_alloc(cls, node2))
472                                         continue;
473
474                                 if (!tentative_mode)
475                                         be_spill_phi(senv, node2);
476                         }
477                         DB((dbg, LEVEL_2, "\n"));
478                         break;
479                 }
480
481                 /* remove values defined by this instruction from the workset. Values
482                  * defined but not in the workset need free registers */
483                 if (get_irn_mode(node) == mode_T) {
484                         const ir_edge_t *edge;
485
486                         foreach_out_edge(node, edge) {
487                                 ir_node *proj = get_edge_src_irn(edge);
488                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
489                                         continue;
490                                 if (worklist_contains(proj)) {
491                                         worklist_remove(worklist, proj);
492                                 } else {
493                                         ++n_defs;
494                                 }
495                         }
496                 } else if (arch_irn_consider_in_reg_alloc(cls, node)) {
497                         if (worklist_contains(node)) {
498                                 worklist_remove(worklist, node);
499                         } else {
500                                 n_defs = 1;
501                         }
502                 }
503
504                 /* make sure we have enough free registers for the spills */
505                 make_room(worklist, n_defs);
506
507                 /* put all values used by the instruction into the workset */
508                 arity = get_irn_arity(node);
509                 for(i = 0; i < arity; ++i) {
510                         ir_node *use = get_irn_n(node, i);
511
512                         if (!arch_irn_consider_in_reg_alloc(cls, use))
513                                 continue;
514
515                         val_used(worklist, use, node);
516                 }
517
518                 /* we might have too many values in the worklist now and need to spill
519                  * some */
520                 make_room(worklist, 0);
521
522 #ifdef DEBUG_libfirm
523                 print_worklist(worklist, LEVEL_2);
524                 DB((dbg, LEVEL_2, "\n"));
525 #endif
526         }
527
528         update_max_pressure(worklist);
529
530 #ifdef DEBUG_libfirm
531         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
532         print_worklist(worklist, LEVEL_1);
533         DB((dbg, LEVEL_1, "\n"));
534 #endif
535 }
536
537 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
538 {
539         const struct list_head *i1 = &wl1->live_values;
540         const struct list_head *i2 = &wl2->live_values;
541
542         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
543                         i1 = i1->next, i2 = i2->next) {
544                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
545                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
546
547                 if (entry1->value != entry2->value)
548                         return false;
549         }
550         /* both lists at the end */
551         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
552                 return false;
553
554         return true;
555 }
556
557 static bool fill_start_worklist(worklist_t *new_worklist, ir_node *block)
558 {
559         double           best_execfreq   = -1;
560         worklist_t      *best_worklist   = NULL;
561         ir_node         *best_succ_block = NULL;
562         int              best_pos;
563         const ir_edge_t *edge;
564
565         /* construct worklist */
566         foreach_block_succ(block, edge) {
567                 ir_node      *succ_block = get_edge_src_irn(edge);
568                 double       execfreq    = get_block_execfreq(exec_freq, succ_block);
569                 block_info_t *block_info;
570                 worklist_t   *succ_worklist;
571
572                 if (execfreq < best_execfreq)
573                         continue;
574
575                 block_info    = get_block_info(succ_block);
576                 succ_worklist = block_info->start_worklist;
577
578                 if (succ_worklist == NULL || succ_worklist->visited >= worklist_visited)
579                         continue;
580
581                 best_execfreq   = execfreq;
582                 best_worklist   = succ_worklist;
583                 best_succ_block = succ_block;
584                 best_pos        = get_edge_src_pos(edge);
585         }
586
587         if (best_worklist == NULL)
588                 return false;
589         best_worklist->visited = worklist_visited;
590
591         fill_and_activate_worklist(new_worklist, best_worklist, block,
592                         best_succ_block, best_pos);
593         return true;
594 }
595
596 static worklist_t *construct_start_worklist(ir_node *block)
597 {
598         worklist_t *worklist = new_worklist();
599
600         ++worklist_visited;
601
602         while(fill_start_worklist(worklist, block)) {
603                 if (worklist->n_live_values >= n_regs)
604                         break;
605         }
606
607         return worklist;
608 }
609
610 static void process_block(ir_node *block, void *env)
611 {
612         block_info_t    *block_info;
613         worklist_t      *worklist;
614         int              n_preds;
615
616         (void) env;
617         DB((dbg, LEVEL_1, "Processing %+F\n", block));
618
619         worklist   = construct_start_worklist(block);
620         block_info = get_block_info(block);
621
622 #ifdef DEBUG_libfirm
623         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
624         print_worklist(worklist, LEVEL_1);
625         DB((dbg, LEVEL_1, "\n"));
626
627         if (should_have_reached_fixpoint) {
628                 assert(worklists_equal(block_info->end_worklist, worklist));
629         }
630 #endif
631         block_info->end_worklist = duplicate_worklist(worklist);
632
633         do_spilling(block, worklist);
634         deactivate_worklist(worklist);
635
636 #ifdef DEBUG_libfirm
637         if (should_have_reached_fixpoint) {
638                 assert(worklists_equal(block_info->start_worklist, worklist));
639         }
640 #endif
641         block_info->start_worklist = worklist;
642
643         /* we shouldn't have any live values left at the start block */
644         n_preds = get_Block_n_cfgpreds(block);
645         //assert(n_preds != 0 || worklist->n_live_values == 0);
646 }
647
648 typedef union block_or_loop_t block_or_loop_t;
649 union block_or_loop_t {
650         ir_node *block;
651         ir_loop *loop;
652 };
653
654 static block_or_loop_t *loop_blocks;
655 static ir_loop         *current_loop;
656
657 static void find_blocks(ir_node *block);
658
659 static void find_in_loop(ir_loop *loop, ir_node *entry)
660 {
661         loop_info_t     *loop_info = get_loop_info(loop);
662         block_or_loop_t block_or_loop;
663         loop_edge_t     *edge;
664
665         /* simply mark 1 block in the loop to indicate that the loop was already
666          * processed */
667         ir_node *some_block = loop_info->entry_edges->block;
668         if (Block_block_visited(some_block))
669                 return;
670
671         block_or_loop.loop = loop;
672         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
673
674 #ifndef NDEBUG
675         {
676                 /* we should be 1 of the entry blocks */
677                 loop_edge_t *edge  = loop_info->entry_edges;
678                 bool         found = false;
679                 for ( ; edge != NULL; edge = edge->next) {
680                         if (edge->block == entry) {
681                                 found = true;
682                                 break;
683                         }
684                 }
685                 assert(found);
686         }
687 #else
688         (void) entry;
689 #endif
690
691         /* check all loop successors */
692         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
693                 ir_node *succ      = edge->block;
694                 ir_loop *succ_loop = get_irn_loop(succ);
695
696                 if (succ_loop == current_loop) {
697                         find_blocks(succ);
698                 } else {
699                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
700                 }
701         }
702 }
703
704 static void find_blocks(ir_node *block)
705 {
706         const ir_edge_t *edge;
707         block_or_loop_t block_or_loop;
708
709         if (Block_block_visited(block))
710                 return;
711
712         block_or_loop.block = block;
713
714         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
715         mark_Block_block_visited(block);
716
717         foreach_block_succ(block, edge) {
718                 ir_node *succ = get_edge_src_irn(edge);
719
720                 /* is it part of the current loop? */
721                 ir_loop *loop = get_irn_loop(succ);
722                 if (loop != current_loop) {
723                         /* a sub-loop? */
724                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
725                                 find_in_loop(loop, succ);
726                         } else {
727                                 /* parent loop: we're not interested in the block */
728                         }
729                 } else {
730                         find_blocks(succ);
731                 }
732         }
733 }
734
735 /**
736  * append an entry to a worklist. WARNING: The entry must not already be in the
737  * worklist.
738  */
739 static void worklist_append(worklist_t *worklist, ir_node *value,
740                             ir_node *reload_point,
741                                                         ir_loop *unused_livethrough_loop)
742 {
743         worklist_entry_t *entry     = obstack_alloc(&obst, sizeof(entry[0]));
744         memset(entry, 0, sizeof(entry[0]));
745
746 #ifdef EXPENSIVE_CHECKS
747         {
748                 struct list_head *entry;
749                 list_for_each(entry, &worklist->live_values) {
750                         worklist_entry_t *wl_entry
751                                 = list_entry(entry, worklist_entry_t, head);
752                         assert(wl_entry->value != value);
753                 }
754         }
755 #endif
756
757         entry->value                   = value;
758         entry->reload_point            = reload_point;
759         entry->unused_livethrough_loop = unused_livethrough_loop;
760         list_add_tail(&entry->head, &worklist->live_values);
761         ++worklist->n_live_values;
762         assert(worklist->n_live_values <= n_regs);
763 }
764
765 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
766 {
767         loop_edge_t *edge;
768         ++worklist_visited;
769
770         /* add the value to all loop exit and entry blocks */
771         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
772                 ir_node            *block
773                         = get_Block_cfgpred_block(edge->block, edge->pos);
774                 const block_info_t *info     = get_block_info(block);
775                 worklist_t         *worklist = info->end_worklist;
776                 ir_node            *reload_point = NULL;
777
778                 if (worklist->visited >= worklist_visited)
779                         continue;
780                 worklist->visited = worklist_visited;
781
782                 /* TODO: we need a smarter mechanism here, that makes the reloader place
783                  * reload nodes on all loop exits... */
784
785                 worklist_append(worklist, value, reload_point, loop_info->loop);
786         }
787         edge = loop_info->entry_edges;
788         for ( ; edge != NULL; edge = edge->next) {
789                 ir_node            *entry_block = edge->block;
790                 const block_info_t *info        = get_block_info(entry_block);
791                 worklist_t         *worklist    = info->start_worklist;
792                 ir_node            *pred_block;
793                 ir_node            *reload_point;
794
795                 if (worklist->visited >= worklist_visited)
796                         continue;
797                 worklist->visited = worklist_visited;
798
799                 pred_block   = get_Block_cfgpred_block(entry_block, edge->pos);
800                 reload_point = be_get_end_of_block_insertion_point(pred_block);
801
802                 worklist_append(worklist, value, reload_point, loop_info->loop);
803         }
804
805         set_irn_link(value, NULL);
806         ++loop_info->max_register_pressure;
807 }
808
809 static void push_unused_livethroughs(loop_info_t *loop_info)
810 {
811         loop_edge_t *edge;
812
813         /* we can only push unused livethroughs if register pressure inside the loop
814          * was low enough */
815         if (loop_info->max_register_pressure >= n_regs)
816                 return;
817
818         /* find unused livethroughs: register pressure in the loop was low enough
819          * which means that we had no spills which implies that at every point in
820          * the loop all*/
821         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
822                 ir_node            *block          = edge->block;
823                 const block_info_t *info           = get_block_info(block);
824                 worklist_t         *start_worklist = info->start_worklist;
825                 ir_node            *exit_block;
826                 const block_info_t *exit_info;
827                 worklist_t         *end_worklist;
828                 struct list_head   *entry;
829
830                 if (start_worklist == NULL)
831                         continue;
832
833                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
834                 exit_info    = get_block_info(exit_block);
835                 end_worklist = exit_info->end_worklist;
836
837                 activate_worklist(end_worklist);
838                 /* all values contained in the start_worklist, which are not available
839                  * in the end_worklist, must be unused livethroughs */
840
841                 list_for_each(entry, &start_worklist->live_values) {
842                         worklist_entry_t *wl_entry
843                                 = list_entry(entry, worklist_entry_t, head);
844                         ir_node *value = wl_entry->value;
845
846                         if (loop_info->max_register_pressure >= n_regs)
847                                 break;
848
849
850                         if (!worklist_contains(value)) {
851                                 /* add it to all loop exits */
852                                 DB((dbg, LEVEL_2, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure));
853                                 push_unused_livethrough(loop_info, value);
854                                 /* the value should now be part of end_worklist, so mark it */
855                                 mark_irn_visited(value);
856                         }
857                 }
858
859                 deactivate_worklist(end_worklist);
860         }
861 }
862
863 static void process_block_or_loop(const block_or_loop_t block_or_loop)
864 {
865         if (block_or_loop.loop->kind == k_ir_loop) {
866                 loop_info_t *loop_info = get_loop_info(block_or_loop.loop);
867
868                 if (do_push_unused_livethroughs)
869                         push_unused_livethroughs(loop_info);
870
871                 if (loop_info->max_register_pressure > max_register_pressure)
872                         max_register_pressure = loop_info->max_register_pressure;
873
874                 return;
875         }
876         process_block(block_or_loop.block, NULL);
877 }
878
879 static void process_loop(ir_loop *loop)
880 {
881         int         n_elements = get_loop_n_elements(loop);
882         int         i, len;
883         loop_info_t *loop_info;
884         loop_edge_t *edge;
885         ir_node     *some_block;
886
887         /* first handle all sub-loops */
888         for (i = 0; i < n_elements; ++i) {
889                 loop_element element = get_loop_element(loop, i);
890                 if (*element.kind != k_ir_loop)
891                         continue;
892
893                 process_loop(element.son);
894         }
895
896         /* create a postorder of the blocks */
897         loop_info = get_loop_info(loop);
898         edge      = loop_info->entry_edges;
899         if (edge != NULL) {
900                 some_block = edge->block;
901         } else {
902                 assert(loop == get_irg_loop(current_ir_graph));
903                 some_block = get_irg_start_block(current_ir_graph);
904         }
905
906         loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
907         current_loop = loop;
908
909         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
910         inc_irg_block_visited(current_ir_graph);
911         find_blocks(some_block);
912         /* for endless loops the end-block might be unreachable */
913         if (loop == get_irg_loop(current_ir_graph)) {
914                 find_blocks(get_irg_end_block(current_ir_graph));
915         }
916         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
917
918         DB((dbg, LEVEL_3, "Block List for loop %p\n", loop));
919         len = ARR_LEN(loop_blocks);
920         for (i = 0; i < len; ++i) {
921                 block_or_loop_t block_or_loop = loop_blocks[i];
922                 if (block_or_loop.loop->kind == k_ir_loop) {
923                         DB((dbg, LEVEL_3, " L-%p", block_or_loop.loop));
924                 } else {
925                         DB((dbg, LEVEL_3, " B-%+F", block_or_loop.block));
926                 }
927         }
928         DB((dbg, LEVEL_3, "\n"));
929
930         max_register_pressure = 0;
931
932         /* run1: tentative phase */
933         tentative_mode = true;
934         for (i = len-1; i >= 0; --i) {
935                 process_block_or_loop(loop_blocks[i]);
936         }
937
938         /* run2: tentative phase - should reach fixpoint */
939         tentative_mode = true;
940         for (i = len-1; i >= 0; --i) {
941                 process_block_or_loop(loop_blocks[i]);
942         }
943
944 #ifndef NDEBUG
945         /* run3: tentative phase - check fixpoint */
946         tentative_mode               = true;
947         should_have_reached_fixpoint = true;
948         for (i = len-1; i >= 0; --i) {
949                 process_block_or_loop(loop_blocks[i]);
950         }
951         should_have_reached_fixpoint = false;
952 #endif
953
954         /* run4: add spills/reloads */
955         tentative_mode              = false;
956         do_push_unused_livethroughs = true;
957         for (i = len-1; i >= 0; --i) {
958                 process_block_or_loop(loop_blocks[i]);
959         }
960         do_push_unused_livethroughs = false;
961
962         loop_info->max_register_pressure = max_register_pressure;
963         DB((dbg, LEVEL_2, "Regpressure in loop %p: %u\n", loop,
964                                 (unsigned) max_register_pressure));
965
966         DEL_ARR_F(loop_blocks);
967 }
968
969 static void fix_block_borders(ir_node *block, void *data)
970 {
971         block_info_t *block_info     = get_block_info(block);
972         worklist_t   *start_worklist = block_info->start_worklist;
973         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
974         ir_loop      *loop           = get_irn_loop(block);
975         int           i;
976
977         (void) data;
978         assert(start_worklist != NULL);
979
980         for (i = 0; i < n_cfgpreds; ++i) {
981                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
982                 block_info_t *pred_block_info = get_block_info(pred_block);
983                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
984                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
985                 bool          is_loop_entry   = false;
986                 struct list_head *entry;
987
988                 assert(end_worklist != NULL);
989
990                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
991                         is_loop_entry = true;
992                 }
993
994                 /* reload missing values */
995                 activate_worklist(end_worklist);
996
997                 list_for_each(entry, &start_worklist->live_values) {
998                         worklist_entry_t *wl_entry
999                                 = list_entry(entry, worklist_entry_t, head);
1000                         ir_node          *value = wl_entry->value;
1001
1002                         if (is_Phi(value) && get_nodes_block(value) == block) {
1003                                 value = get_irn_n(value, i);
1004
1005                                 /* we might have unknowns as argument for the phi */
1006                                 if (!arch_irn_consider_in_reg_alloc(cls, value))
1007                                         continue;
1008                         }
1009
1010                         if (worklist_contains(value))
1011                                 continue;
1012                         if (wl_entry->unused_livethrough_loop != NULL && !is_loop_entry)
1013                                 continue;
1014
1015                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
1016                 }
1017
1018                 deactivate_worklist(end_worklist);
1019         }
1020 }
1021
1022 /**
1023  * Run the spill algorithm.
1024  *
1025  * @param birg  the graph to run on
1026  * @param ncls  the register class to run on
1027  */
1028 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
1029 {
1030         ir_graph *irg = be_get_birg_irg(birg);
1031
1032         cls    = ncls;
1033         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1034
1035         /* shortcut for register classes with ignore regs only */
1036         if (n_regs == 0)
1037                 return;
1038
1039         worklist_visited = 0;
1040         exec_freq        = be_get_birg_exec_freq(birg);
1041
1042         be_clear_links(irg);
1043         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1044                         | IR_RESOURCE_LOOP_LINK);
1045         inc_irg_visited(irg);
1046
1047         obstack_init(&obst);
1048         senv = be_new_spill_env(birg);
1049
1050         assure_cf_loop(irg);
1051         clear_loop_info(get_irg_loop(irg));
1052         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
1053
1054         process_loop(get_irg_loop(irg));
1055
1056         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
1057
1058         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
1059                         | IR_RESOURCE_LOOP_LINK);
1060
1061         be_insert_spills_reloads(senv);
1062
1063         obstack_free(&obst, NULL);
1064
1065         /* clean up */
1066         be_delete_spill_env(senv);
1067 }
1068
1069 void be_init_spillbelady3(void)
1070 {
1071         static be_spiller_t belady3_spiller = {
1072                 be_spill_belady3
1073         };
1074
1075         be_register_spiller("belady3", &belady3_spiller);
1076         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1077 }
1078
1079 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);