fehler109
[libfirm] / ir / be / bespillbelady3.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
23  * @author      Matthias Braun
24  * @date        15.01.2008
25  * @version     $Id$
26  *
27  * TODO:
28  *   - handle phis correctly, decide wether we should spill them ~ok
29  *   - merge multiple start worksets of blocks ~ok
30  *   - how to and when to do the tentative phase...
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "debug.h"
37 #include "list.h"
38 #include "pdeq.h"
39
40 #include "irnode_t.h"
41 #include "irprintf.h"
42 #include "iredges_t.h"
43 #include "execfreq.h"
44
45 #include "bemodule.h"
46 #include "bespill.h"
47 #include "beutil.h"
48 #include "bespilloptions.h"
49 #include "besched_t.h"
50 #include "be_t.h"
51
52 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
53
54 typedef struct worklist_entry_t worklist_entry_t;
55 struct worklist_entry_t {
56         struct list_head  head;
57         ir_node          *value;
58         unsigned          timestep;
59         ir_node          *reload_point;
60 };
61
62 typedef struct worklist_t worklist_t;
63 struct worklist_t {
64         struct list_head  live_values;
65         size_t            n_live_values;
66         unsigned          current_timestep;
67 };
68
69 static const arch_env_t            *arch_env;
70 static const arch_register_class_t *cls;
71 static struct obstack               obst;
72 static spill_env_t                 *senv;
73 static size_t                       n_regs;
74 static int                          tentative_mode;
75 static ir_exec_freq                *exec_freq;
76
77 static worklist_t *new_worklist(void)
78 {
79         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
80
81         INIT_LIST_HEAD(&worklist->live_values);
82         worklist->n_live_values    = 0;
83         worklist->current_timestep = 0;
84
85         return worklist;
86 }
87
88 static void mark_irn_not_visited(ir_node *node)
89 {
90         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
91 }
92
93 static void deactivate_worklist(const worklist_t *worklist)
94 {
95         struct list_head *entry;
96
97         list_for_each(entry, &worklist->live_values) {
98                 worklist_entry_t *wl_entry
99                         = list_entry(entry, worklist_entry_t, head);
100                 assert(irn_visited(wl_entry->value));
101                 mark_irn_not_visited(wl_entry->value);
102                 set_irn_link(wl_entry->value, NULL);
103         }
104 }
105
106 static worklist_t *duplicate_activate_worklist(const worklist_t *worklist,
107                 ir_node *block, ir_node *succ_block, int succ_pos)
108 {
109         ir_node          *reload_point  = NULL;
110         size_t            n_live_values = 0;
111         worklist_t       *new_worklist;
112         struct list_head *entry;
113
114         if(succ_block != NULL && get_Block_n_cfgpreds(succ_block) > 1) {
115                 ir_fprintf(stderr, "adjust reload point from %+F to %+F\n", succ_block, block);
116                 reload_point = be_get_end_of_block_insertion_point(block);
117         }
118
119         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
120         INIT_LIST_HEAD(&new_worklist->live_values);
121         new_worklist->current_timestep = worklist->current_timestep;
122
123         list_for_each(entry, &worklist->live_values) {
124                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
125                 ir_node          *value     = wl_entry->value;
126                 worklist_entry_t *new_entry
127                         = obstack_alloc(&obst, sizeof(new_entry[0]));
128
129                 if(is_Phi(value) && get_nodes_block(value) == succ_block) {
130                         value = get_Phi_pred(value, succ_pos);
131                 }
132
133                 new_entry->value        = value;
134                 new_entry->timestep     = wl_entry->timestep;
135                 if(reload_point != NULL) {
136                         new_entry->reload_point = reload_point;
137                 } else {
138                         new_entry->reload_point = wl_entry->reload_point;
139                 }
140
141                 if(irn_not_visited(value)) {
142                         list_add_tail(&new_entry->head, &new_worklist->live_values);
143                         ++n_live_values;
144
145                         mark_irn_visited(value);
146                         set_irn_link(value, new_entry);
147                 } else {
148                         /* the only way we might visit a value again, is when we get it as
149                          * phi predecessor */
150                         assert(is_Phi(wl_entry->value));
151                 }
152         }
153         new_worklist->n_live_values = n_live_values;
154
155         return new_worklist;
156 }
157
158 #ifdef DEBUG_libfirm
159 static void print_worklist(const worklist_t *worklist, int level)
160 {
161         struct list_head *entry;
162
163         DB((dbg, level, "%d values (TS %u): ", worklist->n_live_values,
164             worklist->current_timestep));
165         list_for_each(entry, &worklist->live_values) {
166                 worklist_entry_t *wl_entry
167                         = list_entry(entry, worklist_entry_t, head);
168
169                 DB((dbg, level, "%+F ", wl_entry->value));
170         }
171 }
172 #endif
173
174 static void place_reload(worklist_entry_t *entry)
175 {
176         if(tentative_mode)
177                 return;
178         DB((dbg, LEVEL_1, "reload of %+F before %+F", entry->value,
179             entry->reload_point));
180         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
181 }
182
183 static void spill_non_live_nodes(const worklist_t *worklist)
184 {
185         struct list_head *entry;
186
187         list_for_each(entry, &worklist->live_values) {
188                 worklist_entry_t *wl_entry
189                         = list_entry(entry, worklist_entry_t, head);
190                 ir_node          *value = wl_entry->value;
191
192                 if(irn_visited(value))
193                         continue;
194
195                 place_reload(wl_entry);
196         }
197 }
198
199 /**
200  * makes sure the worklist contains not more than n_regs - room_needed entries
201  */
202 static void make_room(worklist_t *worklist, size_t room_needed)
203 {
204         int spills_needed = worklist->n_live_values + room_needed - n_regs;
205         if(spills_needed > 0) {
206                 int               i     = spills_needed;
207                 struct list_head *entry = worklist->live_values.next;
208                 for(i = spills_needed; i > 0; --i) {
209                         struct list_head *next = entry->next;
210                         worklist_entry_t *wl_entry
211                                 = list_entry(entry, worklist_entry_t, head);
212                         assert(irn_visited(wl_entry->value));
213                         mark_irn_not_visited(wl_entry->value);
214                         place_reload(wl_entry);
215                         list_del(entry);
216
217                         entry = next;
218                 }
219                 worklist->n_live_values -= spills_needed;
220         }
221 }
222
223 /**
224  * a value was used, so bring it to the back of the worklist (which might
225  * result in a spill of another value).
226  */
227 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
228 {
229         /* is the node in the worklist already? */
230         worklist_entry_t *entry = get_irn_link(value);
231         if(irn_visited(value)) {
232                 assert(entry != NULL);
233
234                 assert(irn_visited(value));
235                 list_del(&entry->head);
236         } else {
237                 if(entry == NULL) {
238                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
239                         entry->value = value;
240                         set_irn_link(value, entry);
241                 }
242
243                 ++worklist->n_live_values;
244                 mark_irn_visited(value);
245         }
246
247         entry->timestep     = worklist->current_timestep;
248         entry->reload_point = sched_point;
249         list_add_tail(&entry->head, &worklist->live_values);
250 }
251
252 static void worklist_remove(worklist_t *worklist, ir_node *value)
253 {
254         worklist_entry_t *entry = get_irn_link(value);
255         assert(entry != NULL);
256         list_del(&entry->head);
257         --worklist->n_live_values;
258
259         assert(irn_visited(value));
260         mark_irn_not_visited(value);
261 }
262
263 static void do_spilling(ir_node *block, worklist_t *worklist)
264 {
265         ir_node *node;
266
267         assert(worklist != NULL);
268
269 #ifdef DEBUG_libfirm
270         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
271         print_worklist(worklist, LEVEL_1);
272         DB((dbg, LEVEL_1, "\n"));
273 #endif
274
275         sched_foreach_reverse(block, node) {
276                 int    i, arity;
277                 size_t n_defs = 0;
278
279                 DB((dbg, LEVEL_2, "\t%+F... ", node));
280
281                 if(is_Phi(node)) {
282                         ir_node *node2;
283                         /* TODO: if we have some free registers, then we could decide to
284                          * not spill some phis (but not for phis where at least 1 input is
285                          * themselfes) */
286
287                         /* we have to spill all phis that are not live */
288                         sched_foreach_reverse_from(node, node2) {
289                                 assert(is_Phi(node2));
290
291                                 if(irn_visited(node2))
292                                         continue;
293                                 if(!arch_irn_consider_in_reg_alloc(arch_env, cls, node2))
294                                         continue;
295
296                                 if(!tentative_mode)
297                                         be_spill_phi(senv, node2);
298                         }
299                         break;
300                 }
301
302                 /* remove values defined by this instruction from the workset. Values
303                  * defined but not in the workset need free registers */
304                 if(get_irn_mode(node) == mode_T) {
305                         const ir_edge_t *edge;
306
307                         foreach_out_edge(node, edge) {
308                                 ir_node *proj = get_edge_src_irn(edge);
309                                 if(!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
310                                         continue;
311                                 if(irn_visited(proj)) {
312                                         worklist_remove(worklist, proj);
313                                 } else {
314                                         ++n_defs;
315                                 }
316                         }
317                 } else if(arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
318                         if(irn_visited(node)) {
319                                 worklist_remove(worklist, node);
320                         } else {
321                                 n_defs = 1;
322                         }
323                 }
324
325                 /* make sure we have enough free registers for the spills */
326                 make_room(worklist, n_defs);
327
328                 /* put all values by the instruction into the workset */
329                 arity = get_irn_arity(node);
330                 for(i = 0; i < arity; ++i) {
331                         ir_node *use = get_irn_n(node, i);
332
333                         if(!arch_irn_consider_in_reg_alloc(arch_env, cls, use))
334                                 continue;
335
336                         val_used(worklist, use, node);
337                 }
338
339                 /* we might have too many values in the worklist now and need to spill
340                  * some */
341                 make_room(worklist, 0);
342
343                 ++worklist->current_timestep;
344
345 #ifdef DEBUG_libfirm
346                 print_worklist(worklist, LEVEL_2);
347                 DB((dbg, LEVEL_2, "\n"));
348 #endif
349         }
350
351 #ifdef DEBUG_libfirm
352         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
353         print_worklist(worklist, LEVEL_1);
354         DB((dbg, LEVEL_1, "\n"));
355 #endif
356 }
357
358 static void process_block(ir_node *block, void *env)
359 {
360         int              n_preds;
361         const ir_edge_t *edge;
362         worklist_t      *worklist        = NULL;
363         double           best_execfreq   = -1;
364         ir_node         *best_succ_block = NULL;
365         int              best_pos        = -1;
366
367         (void) env;
368         DB((dbg, LEVEL_1, "Processing %+F\n", block));
369
370         /* construct worklist */
371         foreach_block_succ(block, edge) {
372                 ir_node *succ_block = get_edge_src_irn(edge);
373                 double   execfreq   = get_block_execfreq(exec_freq, succ_block);
374
375                 if(execfreq > best_execfreq) {
376                         worklist_t *succ_worklist = get_irn_link(succ_block);
377                         if(succ_worklist != NULL) {
378                                 best_execfreq   = execfreq;
379                                 worklist        = succ_worklist;
380                                 best_succ_block = succ_block;
381                                 best_pos        = get_edge_src_pos(edge);
382                         }
383                 }
384         }
385         if(worklist == NULL) {
386                 /* at least 1 successor block must have been processed unless it was
387                  * the end block */
388                 assert(tentative_mode || block == get_irg_end_block(get_irn_irg(block)));
389
390                 worklist = new_worklist();
391         } else {
392                 worklist = duplicate_activate_worklist(worklist, block, best_succ_block,
393                                                        best_pos);
394
395                 /* now we could have live values in the succ worklists that are not
396                  * live anymore in the worklist we picked. We need reloads for them. */
397                 if(!tentative_mode) {
398                         foreach_block_succ(block, edge) {
399                                 ir_node      *succ_block    = get_edge_src_irn(edge);
400                                 worklist_t   *succ_worklist = get_irn_link(succ_block);
401
402                                 spill_non_live_nodes(succ_worklist);
403                         }
404                 }
405         }
406
407         do_spilling(block, worklist);
408         deactivate_worklist(worklist);
409
410         set_irn_link(block, worklist);
411
412         /* we shouldn't have any live values left at the start block */
413         n_preds = get_Block_n_cfgpreds(block);
414         assert(n_preds != 0 || worklist->n_live_values == 0);
415 }
416
417 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
418 {
419         ir_graph *irg = be_get_birg_irg(birg);
420
421         cls       = ncls;
422         n_regs    = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
423
424         if(n_regs == 0)
425                 return;
426
427         arch_env       = be_get_birg_arch_env(birg);
428         exec_freq      = be_get_birg_exec_freq(birg);
429
430         be_clear_links(irg);
431         set_using_irn_link(irg);
432         set_using_irn_visited(irg);
433         inc_irg_visited(irg);
434
435         obstack_init(&obst);
436         senv = be_new_spill_env(birg);
437
438         /* do a post-order walk over the CFG to make sure we have a maximum number
439          * of preds processed before entering a block */
440         tentative_mode = 1;
441         irg_block_edges_walk(get_irg_start_block(irg), NULL, process_block, NULL);
442
443         tentative_mode = 0;
444         irg_block_edges_walk(get_irg_start_block(irg), NULL, process_block, NULL);
445
446         clear_using_irn_link(irg);
447         clear_using_irn_visited(irg);
448
449         be_insert_spills_reloads(senv);
450
451         obstack_free(&obst, NULL);
452
453         /* clean up */
454         be_delete_spill_env(senv);
455 }
456
457 void be_init_spillbelady3(void)
458 {
459         static be_spiller_t belady3_spiller = {
460                 be_spill_belady3
461         };
462
463         be_register_spiller("belady3", &belady3_spiller);
464         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
465 }
466
467 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);