ea43fe9ebb332a2fc178383c0dd7e267576e7a53
[libfirm] / ir / be / beblocksched.c
1 /*
2  * Author:      Matthias Braun, Christoph Mallon
3  * Date:                27.09.2006
4  * Copyright:   (c) Universitaet Karlsruhe
5  * License:     This file is protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  * CVS-Id:      $Id$
7  */
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif /* HAVE_CONFIG_H */
11
12 #include "beblocksched.h"
13
14 #include <stdlib.h>
15
16 #include "array.h"
17 #include "pdeq.h"
18
19 #include "iredges.h"
20 #include "irgwalk.h"
21 #include "irgraph_t.h"
22 #include "irloop.h"
23 #include "irprintf.h"
24 #include "irdump_t.h"
25 #include "beirgmod.h"
26
27 #ifdef WITH_LIBCORE
28 #include <libcore/lc_opts.h>
29 #include <libcore/lc_opts_enum.h>
30 #include <libcore/lc_timing.h>
31 #endif /* WITH_LIBCORE */
32
33 #ifdef WITH_ILP
34 #include <lpp/lpp.h>
35 #include <lpp/lpp_net.h>
36 #endif /* WITH_ILP */
37
38 typedef enum _blocksched_algos_t {
39         BLOCKSCHED_NAIV, BLOCKSCHED_EXTBB, BLOCKSCHED_GREEDY, BLOCKSCHED_ILP
40 } blocksched_algos_t;
41
42 static int algo = BLOCKSCHED_GREEDY;
43
44 static const lc_opt_enum_int_items_t blockschedalgo_items[] = {
45         { "naiv",       BLOCKSCHED_NAIV },
46         { "extbb",      BLOCKSCHED_EXTBB },
47         { "greedy", BLOCKSCHED_GREEDY },
48 #ifdef WITH_ILP
49         { "ilp",    BLOCKSCHED_ILP },
50 #endif /* WITH_ILP */
51         { NULL,     0 }
52 };
53
54 static lc_opt_enum_int_var_t algo_var = {
55         &algo, blockschedalgo_items
56 };
57
58 static const lc_opt_table_entry_t be_blocksched_options[] = {
59         LC_OPT_ENT_ENUM_INT ("algo", "the block scheduling algorithm (naiv, extbb, greedy, ilp)", &algo_var),
60         { NULL }
61 };
62
63 /*
64  *   ____                   _
65  *  / ___|_ __ ___  ___  __| |_   _
66  * | |  _| '__/ _ \/ _ \/ _` | | | |
67  * | |_| | | |  __/  __/ (_| | |_| |
68  *  \____|_|  \___|\___|\__,_|\__, |
69  *                            |___/
70  */
71
72 typedef struct _blocksched_entry_t {
73         ir_node *block;
74         struct _blocksched_entry_t *next;
75         struct _blocksched_entry_t *prev;
76 } blocksched_entry_t;
77
78 typedef struct _edge_t {
79         ir_node *block;
80         int pos;
81         double execfreq;
82         int highest_execfreq;           /**< flag that indicates wether this edge is the edge with the highest
83                                                                      execfreq pointing away from this block */
84 } edge_t;
85
86 typedef struct _blocksched_env_t {
87         ir_graph *irg;
88         struct obstack *obst;
89         ir_exec_freq *execfreqs;
90         edge_t *edges;
91         pdeq *worklist;
92         int blockcount;
93 } blocksched_env_t;
94
95 static void collect_egde_frequency(ir_node *block, void *data)
96 {
97         blocksched_env_t *env = data;
98         ir_graph *irg = env->irg;
99         ir_node *startblock = get_irg_start_block(irg);
100         int arity;
101         edge_t edge;
102         blocksched_entry_t *entry;
103
104         entry = obstack_alloc(env->obst, sizeof(entry[0]));
105         entry->block = block;
106         entry->next = NULL;
107         entry->prev = NULL;
108         set_irn_link(block, entry);
109
110         if(block == startblock)
111                 return;
112
113         arity = get_irn_arity(block);
114         if(arity == 1) {
115                 edge.block = block;
116                 edge.pos = 0;
117                 edge.execfreq = get_block_execfreq(env->execfreqs, block);
118                 edge.highest_execfreq = 1;
119                 ARR_APP1(edge_t, env->edges, edge);
120         } else {
121                 int i;
122                 double highest_execfreq = -1;
123                 int highest_edge_num;
124
125                 edge.block = block;
126                 for(i = 0; i < arity; ++i) {
127                         double execfreq;
128
129                         ir_node *pred_block = get_Block_cfgpred_block(block, i);
130                         execfreq = get_block_execfreq(env->execfreqs, pred_block);
131
132                         edge.pos = i;
133                         edge.execfreq = execfreq;
134                         edge.highest_execfreq = 0;
135                         ARR_APP1(edge_t, env->edges, edge);
136                         if(execfreq > highest_execfreq) {
137                                 highest_execfreq = execfreq;
138                                 highest_edge_num = ARR_LEN(env->edges) - 1;
139                         }
140                 }
141
142                 env->edges[highest_edge_num].highest_execfreq = 1;
143         }
144 }
145
146 static int cmp_edges(const void *d1, const void *d2)
147 {
148         const edge_t *e1 = d1;
149         const edge_t *e2 = d2;
150         if (e2->execfreq > e1->execfreq) return 1;
151         if (e2->execfreq < e1->execfreq) return -1;
152         return 0;
153 }
154
155 static void coalesce_blocks(blocksched_env_t *env)
156 {
157         int i;
158         int edge_count = ARR_LEN(env->edges);
159
160         // run1: only look at jumps
161         for(i = 0; i < edge_count; ++i) {
162                 const edge_t *edge = & env->edges[i];
163                 ir_node *block = edge->block;
164                 ir_node *pred_block;
165                 blocksched_entry_t *entry, *pred_entry;
166
167                 // the block might have been removed already...
168                 if(is_Bad(get_Block_cfgpred(block, 0)))
169                         continue;
170
171                 if(!edge->highest_execfreq)
172                         continue;
173
174                 pred_block = get_Block_cfgpred_block(block, edge->pos);
175                 entry = get_irn_link(block);
176                 pred_entry = get_irn_link(pred_block);
177
178                 if(pred_entry->next != NULL || entry->prev != NULL)
179                         continue;
180                 // only coalesce jumps
181                 if(get_block_succ_next(pred_block, get_block_succ_first(pred_block)) != NULL)
182                         continue;
183
184                 // schedule the 2 blocks behind each other
185                 ir_fprintf(stderr, "Coalesce (Jump) %+F -> %+F (%.3g)\n",
186                            pred_entry->block, entry->block, edge->execfreq);
187                 pred_entry->next = entry;
188                 entry->prev = pred_entry;
189         }
190
191         // run2: remaining edges
192         for(i = 0; i < edge_count; ++i) {
193                 const edge_t *edge = & env->edges[i];
194                 ir_node *block = edge->block;
195                 ir_node *pred_block;
196                 blocksched_entry_t *entry, *pred_entry;
197
198                 // the block might have been removed already...
199                 if(is_Bad(get_Block_cfgpred(block, 0)))
200                         continue;
201
202                 pred_block = get_Block_cfgpred_block(block, edge->pos);
203                 entry = get_irn_link(block);
204                 pred_entry = get_irn_link(pred_block);
205
206                 if(pred_entry->next != NULL || entry->prev != NULL)
207                         continue;
208
209                 // schedule the 2 blocks behind each other
210                 ir_fprintf(stderr, "Coalesce (CondJump) %+F -> %+F (%.3g)\n",
211                            pred_entry->block, entry->block, edge->execfreq);
212                 pred_entry->next = entry;
213                 entry->prev = pred_entry;
214         }
215 }
216
217 static void pick_block_successor(blocksched_entry_t *entry, blocksched_env_t *env)
218 {
219         ir_node *block = entry->block;
220         blocksched_entry_t *succ_entry;
221         const ir_edge_t *edge;
222         double best_succ_execfreq;
223         ir_node *succ = NULL;
224
225         if(irn_visited(block))
226                 return;
227         env->blockcount++;
228         mark_irn_visited(block);
229
230         ir_fprintf(stderr, "Pick succ of %+F\n", block);
231
232         // put all successors into the worklist
233         foreach_block_succ(block, edge) {
234                 ir_node *succ_block = get_edge_src_irn(edge);
235
236                 if(irn_visited(succ_block))
237                         continue;
238
239                 // we only need to put the first of a series of already connected
240                 // blocks into the worklist
241                 succ_entry = get_irn_link(succ_block);
242                 while(succ_entry->prev != NULL) {
243                         // break cycles...
244                         if(succ_entry->prev->block == succ_block) {
245                                 succ_entry->prev->next = NULL;
246                                 succ_entry->prev = NULL;
247                                 break;
248                         }
249                         succ_entry = succ_entry->prev;
250                 };
251
252                 if(irn_visited(succ_entry->block))
253                         continue;
254
255                 ir_fprintf(stderr, "Put %+F into worklist\n", succ_entry->block);
256                 pdeq_putr(env->worklist, succ_entry->block);
257         }
258
259         if(entry->next != NULL) {
260                 pick_block_successor(entry->next, env);
261                 return;
262         }
263
264         fprintf(stderr, "deciding...\n");
265         best_succ_execfreq = -1;
266         /* no successor yet: pick the successor block with the highest execution
267          * frequency which has no predecessor yet
268          */
269         foreach_block_succ(block, edge) {
270                 ir_node *succ_block = get_edge_src_irn(edge);
271                 double execfreq;
272
273                 if(irn_visited(succ_block))
274                         continue;
275
276                 succ_entry = get_irn_link(succ_block);
277                 if(succ_entry->prev != NULL)
278                         continue;
279
280                 execfreq = get_block_execfreq(env->execfreqs, succ_block);
281                 if(execfreq > best_succ_execfreq) {
282                         best_succ_execfreq = execfreq;
283                         succ = succ_block;
284                 }
285         }
286
287         if(succ == NULL) {
288                 fprintf(stderr, "pick from worklist\n");
289
290                 do {
291                         if(pdeq_empty(env->worklist)) {
292                                 fprintf(stderr, "worklist empty\n");
293                                 return;
294                         }
295                         succ = pdeq_getl(env->worklist);
296                 } while(irn_visited(succ));
297         }
298
299         succ_entry = get_irn_link(succ);
300         entry->next = succ_entry;
301         succ_entry->prev = entry;
302
303         pick_block_successor(succ_entry, env);
304 }
305
306 static blocksched_entry_t *finish_block_schedule(blocksched_env_t *env)
307 {
308         ir_graph *irg = env->irg;
309         ir_node *startblock = get_irg_start_block(irg);
310         blocksched_entry_t *entry = get_irn_link(startblock);
311
312         inc_irg_visited(irg);
313
314         env->worklist = new_pdeq();
315         pick_block_successor(entry, env);
316         assert(pdeq_empty(env->worklist));
317         del_pdeq(env->worklist);
318
319         return entry;
320 }
321
322 static ir_node **create_blocksched_array(blocksched_entry_t *first, int count,
323                                          struct obstack* obst) {
324         int i = 0;
325         ir_node **block_list;
326         blocksched_entry_t *entry;
327
328         block_list = NEW_ARR_D(ir_node *, obst, count);
329         fprintf(stderr, "Blockschedule:\n");
330         for(entry = first; entry != NULL; entry = entry->next) {
331                 assert(i < count);
332                 block_list[i++] = entry->block;
333                 ir_fprintf(stderr, "\t%+F\n", entry->block);
334         }
335         assert(i == count);
336
337         return block_list;
338 }
339
340 static ir_node **create_block_schedule_greedy(ir_graph *irg, ir_exec_freq *execfreqs)
341 {
342         blocksched_env_t env;
343         struct obstack obst;
344         blocksched_entry_t *start_entry;
345         ir_node **block_list;
346
347         obstack_init(&obst);
348
349         env.irg = irg;
350         env.obst = &obst;
351         env.execfreqs = execfreqs;
352         env.edges = NEW_ARR_F(edge_t, 0);
353         env.worklist = NULL;
354         env.blockcount = 0;
355
356         // collect edge execution frequencies
357         irg_block_walk_graph(irg, collect_egde_frequency, NULL, &env);
358
359         // sort interblock edges by execution frequency
360         qsort(env.edges, ARR_LEN(env.edges), sizeof(env.edges[0]), cmp_edges);
361
362         be_remove_empty_blocks(irg);
363
364         if(algo != BLOCKSCHED_NAIV)
365                 coalesce_blocks(&env);
366
367         start_entry = finish_block_schedule(&env);
368
369         block_list = create_blocksched_array(start_entry, env.blockcount, get_irg_obstack(irg));
370
371         DEL_ARR_F(env.edges);
372         obstack_free(&obst, NULL);
373
374         return block_list;
375 }
376
377 /*
378  *  ___ _     ____
379  * |_ _| |   |  _ \
380  *  | || |   | |_) |
381  *  | || |___|  __/
382  * |___|_____|_|
383  *
384  */
385
386 #ifdef WITH_ILP
387 typedef struct _ilp_edge_t {
388         ir_node *block;
389         int pos;
390         int ilpvar;
391 } ilp_edge_t;
392
393 typedef struct _blocksched_ilp_env_t {
394         blocksched_env_t env;
395         ilp_edge_t *ilpedges;
396         lpp_t *lpp;
397 } blocksched_ilp_env_t;
398
399 typedef struct _blocksched_ilp_entry_t {
400         ir_node *block;
401         struct _blocksched_entry_t *next;
402         struct _blocksched_entry_t *prev;
403
404         int out_cst;
405 } blocksched_ilp_entry_t;
406
407 static int add_ilp_edge(ir_node *block, int pos, double execfreq, blocksched_ilp_env_t *env)
408 {
409         char name[64];
410         ilp_edge_t edge;
411         int edgeidx = ARR_LEN(env->ilpedges);
412
413         snprintf(name, sizeof(name), "edge%d", edgeidx);
414
415         edge.block = block;
416         edge.pos = pos;
417         edge.ilpvar = lpp_add_var_default(env->lpp, name, lpp_binary, execfreq, 1.0);
418
419         ARR_APP1(ilp_edge_t, env->ilpedges, edge);
420         return edgeidx;
421 }
422
423 static void collect_egde_frequency_ilp(ir_node *block, void *data)
424 {
425         blocksched_ilp_env_t *env = data;
426         ir_graph *irg = env->env.irg;
427         ir_node *startblock = get_irg_start_block(irg);
428         int arity;
429         blocksched_ilp_entry_t *entry;
430         lpp_cst_t cst;
431         char name[64];
432         int out_count;
433
434         snprintf(name, sizeof(name), "block_out_constr_%ld", get_irn_node_nr(block));
435         out_count = get_irn_n_edges_kind(block, EDGE_KIND_BLOCK);
436
437         entry = obstack_alloc(env->env.obst, sizeof(entry[0]));
438         entry->block = block;
439         entry->next = NULL;
440         entry->prev = NULL;
441         entry->out_cst = lpp_add_cst_uniq(env->lpp, name, lpp_greater, out_count - 1);
442         set_irn_link(block, entry);
443
444         if(block == startblock)
445                 return;
446
447         arity = get_irn_arity(block);
448         if(arity == 1) {
449                 double execfreq = get_block_execfreq(env->env.execfreqs, block);
450                 add_ilp_edge(block, 0, execfreq, env);
451         } else {
452                 int i;
453                 int *edgenums = alloca(sizeof(edgenums[0]) * arity);
454
455                 snprintf(name, sizeof(name), "block_in_constr_%ld", get_irn_node_nr(block));
456                 cst = lpp_add_cst_uniq(env->lpp, name, lpp_greater, arity - 1);
457
458                 for(i = 0; i < arity; ++i) {
459                         double execfreq;
460                         int edgenum;
461                         ilp_edge_t *edge;
462
463                         ir_node *pred_block = get_Block_cfgpred_block(block, i);
464                         execfreq = get_block_execfreq(env->env.execfreqs, pred_block);
465
466                         edgenum = add_ilp_edge(block, i, execfreq, env);
467                         edge = & env->ilpedges[edgenum];
468                         lpp_set_factor_fast(env->lpp, cst, edge->ilpvar, 1.0);
469                 }
470         }
471 }
472
473
474 static void coalesce_blocks_ilp(blocksched_ilp_env_t *env)
475 {
476         int i;
477         int edge_count = ARR_LEN(env->ilpedges);
478         FILE *f;
479         char fname[256];
480
481         /* complete out constraints */
482         for(i = 0; i < edge_count; ++i) {
483                 const ilp_edge_t *edge = & env->ilpedges[i];
484                 ir_node *block = edge->block;
485                 ir_node *pred;
486                 blocksched_ilp_entry_t *entry;
487
488                 // the block might have been removed already...
489                 if(is_Bad(get_Block_cfgpred(block, 0)))
490                         continue;
491
492                 pred = get_Block_cfgpred_block(block, edge->pos);
493                 entry = get_irn_link(pred);
494
495                 ir_printf("Adding out cst to %+F from %+F,%d\n",
496                                   pred, block, edge->pos);
497                 lpp_set_factor_fast(env->lpp, entry->out_cst, edge->ilpvar, 1.0);
498         }
499
500         lpp_dump(env->lpp, "lpp.out");
501         snprintf(fname, sizeof(fname), "lpp_%s.plain", get_irg_dump_name(env->env.irg));
502         f = fopen(fname, "w");
503         lpp_dump_plain(env->lpp, f);
504         fclose(f);
505         //lpp_solve_net(env->lpp, main_env->options->ilp_server, main_env->options->ilp_solver);
506         lpp_solve_net(env->lpp, "i44pc52", "cplex");
507         assert(lpp_is_sol_valid(env->lpp));
508
509         /* Apply results to edges */
510         for(i = 0; i < edge_count; ++i) {
511                 const ilp_edge_t *edge = & env->ilpedges[i];
512                 ir_node *block = edge->block;
513                 ir_node *pred;
514                 int is_jump;
515                 blocksched_entry_t *entry;
516                 blocksched_entry_t *pred_entry;
517
518                 // the block might have been removed already...
519                 if(is_Bad(get_Block_cfgpred(block, 0)))
520                         continue;
521
522                 is_jump = lpp_get_var_sol(env->lpp, edge->ilpvar);
523                 if(is_jump)
524                         continue;
525
526                 pred = get_Block_cfgpred_block(block, edge->pos);
527                 entry = get_irn_link(block);
528                 pred_entry = get_irn_link(pred);
529
530                 assert(entry->prev == NULL && pred_entry->next == NULL);
531                 entry->prev = pred_entry;
532                 pred_entry->next = entry;
533         }
534 }
535
536 static ir_node **create_block_schedule_ilp(ir_graph *irg, ir_exec_freq *execfreqs)
537 {
538         blocksched_ilp_env_t env;
539         struct obstack obst;
540         blocksched_entry_t *start_entry;
541         ir_node **block_list;
542
543         obstack_init(&obst);
544
545         env.env.irg = irg;
546         env.env.obst = &obst;
547         env.env.execfreqs = execfreqs;
548         env.env.worklist = NULL;
549         env.env.blockcount = 0;
550         env.ilpedges = NEW_ARR_F(ilp_edge_t, 0);
551
552         env.lpp = new_lpp("blockschedule", lpp_minimize);
553         lpp_set_time_limit(env.lpp, 20);
554         lpp_set_log(env.lpp, stdout);
555
556         irg_block_walk_graph(irg, collect_egde_frequency_ilp, NULL, &env);
557
558         be_remove_empty_blocks(irg);
559
560         coalesce_blocks_ilp(&env);
561
562         start_entry = finish_block_schedule(&env.env);
563
564         block_list = create_blocksched_array(start_entry, env.env.blockcount, get_irg_obstack(irg));
565
566         DEL_ARR_F(env.ilpedges);
567         free_lpp(env.lpp);
568         obstack_free(&obst, NULL);
569
570         return block_list;
571 }
572 #endif /* WITH_ILP */
573
574 /*
575  *  _____      _   ____  ____
576  * | ____|_  _| |_| __ )| __ )
577  * |  _| \ \/ / __|  _ \|  _ \
578  * | |___ >  <| |_| |_) | |_) |
579  * |_____/_/\_\\__|____/|____/
580  *
581  */
582
583 /** A simple forward single linked list. */
584 typedef struct {
585         ir_node *start;   /**< start of the list */
586         ir_node *end;     /**< last block in the list */
587         unsigned n_blks;  /**< number of blocks in the list */
588 } anchor;
589
590 static void add_block(anchor *list, ir_node *block) {
591         if(list->start == NULL) {
592                 list->start = block;
593                 list->end = block;
594         } else {
595                 set_irn_link(list->end, block);
596                 list->end = block;
597         }
598
599         list->n_blks++;
600 }
601
602 static void create_block_list(ir_node *leader_block, anchor *list) {
603         int i;
604         ir_node *block = NULL;
605         const ir_edge_t *edge;
606
607         ir_extblk *extbb = get_Block_extbb(leader_block);
608         if(extbb_visited(extbb))
609                 return;
610         mark_extbb_visited(extbb);
611
612         for(i = 0; i < get_extbb_n_blocks(extbb); ++i) {
613                 block = get_extbb_block(extbb, i);
614                 add_block(list, block);
615         }
616
617         assert(block != NULL);
618
619         // pick successor extbbs
620         foreach_block_succ(block, edge) {
621                 ir_node *succ = get_edge_src_irn(edge);
622
623                 create_block_list(succ, list);
624         }
625
626         for(i = 0; i < get_extbb_n_blocks(extbb) - 1; ++i) {
627                 block = get_extbb_block(extbb, i);
628                 foreach_block_succ(block, edge) {
629                         ir_node *succ = get_edge_src_irn(edge);
630
631                         create_block_list(succ, list);
632                 }
633         }
634 }
635
636 void compute_extbb_execfreqs(ir_graph *irg, ir_exec_freq *execfreqs);
637
638 /*
639  * Calculates a block schedule. The schedule is stored as a linked
640  * list starting at the start_block of the irg.
641  */
642 static ir_node **create_extbb_block_schedule(ir_graph *irg, ir_exec_freq *execfreqs)
643 {
644         anchor list;
645         ir_node **blk_list, *b, *n;
646         unsigned i;
647
648         /* schedule extended basic blocks */
649         compute_extbb_execfreqs(irg, execfreqs);
650         //compute_extbb(irg);
651
652         list.start  = NULL;
653         list.end    = NULL;
654         list.n_blks = 0;
655         inc_irg_block_visited(irg);
656         create_block_list(get_irg_start_block(irg), &list);
657
658         /** create an array, so we can go forward and backward */
659         blk_list = NEW_ARR_D(ir_node *, irg->obst,list.n_blks);
660
661         for (i = 0, b = list.start; b; b = n, ++i) {
662                 n = get_irn_link(b);
663                 blk_list[i] = b;
664         }
665
666         return blk_list;
667 }
668
669 /*
670  *  __  __       _
671  * |  \/  | __ _(_)_ __
672  * | |\/| |/ _` | | '_ \
673  * | |  | | (_| | | | | |
674  * |_|  |_|\__,_|_|_| |_|
675  *
676  */
677
678 #ifdef WITH_LIBCORE
679 void be_block_schedule_register_options(lc_opt_entry_t *grp)
680 {
681         static int run_once = 0;
682         lc_opt_entry_t *blocksched_grp;
683
684         if(run_once)
685                 return;
686         run_once = 1;
687         blocksched_grp = lc_opt_get_grp(grp, "blocksched");
688
689         lc_opt_add_table(blocksched_grp, be_blocksched_options);
690 }
691 #endif /* WITH_LIBCORE */
692
693 ir_node **be_create_block_schedule(ir_graph *irg, ir_exec_freq *execfreqs)
694 {
695         switch(algo) {
696         case BLOCKSCHED_GREEDY:
697         case BLOCKSCHED_NAIV:
698                 return create_block_schedule_greedy(irg, execfreqs);
699         case BLOCKSCHED_EXTBB:
700                 return create_extbb_block_schedule(irg, execfreqs);
701 #ifdef WITH_ILP
702         case BLOCKSCHED_ILP:
703                 return create_block_schedule_ilp(irg, execfreqs);
704 #endif /* WITH_ILP */
705         }
706
707         assert(0 && "unknown blocksched algo");
708         return NULL;
709 }