- Added 2 new blockschedulers, a greedy algorithm and an "optimal" ILP that
[libfirm] / ir / be / beblocksched.c
1 /*
2  * Author:      Matthias Braun, Christoph Mallon
3  * Date:                27.09.2006
4  * Copyright:   (c) Universitaet Karlsruhe
5  * License:     This file is protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  *
7  */
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11
12 #include "beblocksched.h"
13
14 #include <stdlib.h>
15
16 #include "array.h"
17 #include "pdeq.h"
18
19 #include "iredges.h"
20 #include "irgwalk.h"
21 #include "irgraph_t.h"
22 #include "irloop.h"
23 #include "irprintf.h"
24 #include "irdump_t.h"
25 #include "beirgmod.h"
26
27 #ifdef WITH_LIBCORE
28 #include <libcore/lc_opts.h>
29 #include <libcore/lc_opts_enum.h>
30 #include <libcore/lc_timing.h>
31 #endif
32
33 #ifdef WITH_ILP
34 #include <lpp/lpp.h>
35 #include <lpp/lpp_net.h>
36 #endif
37
38 typedef enum _blocksched_algos_t {
39         BLOCKSCHED_NAIV, BLOCKSCHED_EXTBB, BLOCKSCHED_GREEDY, BLOCKSCHED_ILP
40 } blocksched_algos_t;
41
42 static int algo = BLOCKSCHED_GREEDY;
43
44 static const lc_opt_enum_int_items_t blockschedalgo_items[] = {
45         { "naiv",       BLOCKSCHED_NAIV },
46         { "extbb",      BLOCKSCHED_EXTBB },
47         { "greedy", BLOCKSCHED_GREEDY },
48 #ifdef WITH_ILP
49         { "ilp",    BLOCKSCHED_ILP },
50 #endif
51         { NULL,     0 }
52 };
53
54 static lc_opt_enum_int_var_t algo_var = {
55         &algo, blockschedalgo_items
56 };
57
58 static const lc_opt_table_entry_t be_blocksched_options[] = {
59         LC_OPT_ENT_ENUM_INT ("algo", "the block scheduling algorithm", &algo_var),
60         { NULL }
61 };
62
63 /*
64  *   ____                   _
65  *  / ___|_ __ ___  ___  __| |_   _
66  * | |  _| '__/ _ \/ _ \/ _` | | | |
67  * | |_| | | |  __/  __/ (_| | |_| |
68  *  \____|_|  \___|\___|\__,_|\__, |
69  *                            |___/
70  */
71
72 typedef struct _blocksched_entry_t {
73         ir_node *block;
74         struct _blocksched_entry_t *next;
75         struct _blocksched_entry_t *prev;
76 } blocksched_entry_t;
77
78 typedef struct _edge_t {
79         ir_node *block;
80         int pos;
81         double execfreq;
82         int highest_execfreq;           /**< flag that indicates wether this edge is the edge with the highest
83                                                                      execfreq pointing away from this block */
84 } edge_t;
85
86 typedef struct _blocksched_env_t {
87         ir_graph *irg;
88         struct obstack *obst;
89         ir_exec_freq *execfreqs;
90         edge_t *edges;
91         pdeq *worklist;
92         int blockcount;
93 } blocksched_env_t;
94
95 static void collect_egde_frequency(ir_node *block, void *data)
96 {
97         blocksched_env_t *env = data;
98         ir_graph *irg = env->irg;
99         ir_node *startblock = get_irg_start_block(irg);
100         int arity;
101         edge_t edge;
102         blocksched_entry_t *entry;
103
104         entry = obstack_alloc(env->obst, sizeof(entry[0]));
105         entry->block = block;
106         entry->next = NULL;
107         entry->prev = NULL;
108         set_irn_link(block, entry);
109
110         if(block == startblock)
111                 return;
112
113         arity = get_irn_arity(block);
114         if(arity == 1) {
115                 edge.block = block;
116                 edge.pos = 0;
117                 edge.execfreq = get_block_execfreq(env->execfreqs, block);
118                 edge.highest_execfreq = 1;
119                 ARR_APP1(edge_t, env->edges, edge);
120         } else {
121                 int i;
122                 double highest_execfreq = -1;
123                 int highest_edge_num;
124
125                 edge.block = block;
126                 for(i = 0; i < arity; ++i) {
127                         double execfreq;
128
129                         ir_node *pred_block = get_Block_cfgpred_block(block, i);
130                         execfreq = get_block_execfreq(env->execfreqs, pred_block);
131
132                         edge.pos = i;
133                         edge.execfreq = execfreq;
134                         edge.highest_execfreq = 0;
135                         ARR_APP1(edge_t, env->edges, edge);
136                         if(execfreq > highest_execfreq) {
137                                 highest_execfreq = execfreq;
138                                 highest_edge_num = ARR_LEN(env->edges) - 1;
139                         }
140                 }
141
142                 env->edges[highest_edge_num].highest_execfreq = 1;
143         }
144 }
145
146 static int cmp_edges(const void *d1, const void *d2)
147 {
148         const edge_t *e1 = d1;
149         const edge_t *e2 = d2;
150         return e2->execfreq - e1->execfreq;
151 }
152
153 static void coalesce_blocks(blocksched_env_t *env)
154 {
155         int i;
156         int edge_count = ARR_LEN(env->edges);
157
158         // run1: only look at jumps
159         for(i = 0; i < edge_count; ++i) {
160                 const edge_t *edge = & env->edges[i];
161                 ir_node *block = edge->block;
162                 ir_node *pred_block;
163                 blocksched_entry_t *entry, *pred_entry;
164
165                 // the block might have been removed already...
166                 if(is_Bad(get_Block_cfgpred(block, 0)))
167                         continue;
168
169                 if(!edge->highest_execfreq)
170                         continue;
171
172                 pred_block = get_Block_cfgpred_block(block, edge->pos);
173                 entry = get_irn_link(block);
174                 pred_entry = get_irn_link(pred_block);
175
176                 if(pred_entry->next != NULL || entry->prev != NULL)
177                         continue;
178                 // only coalesce jumps
179                 if(get_block_succ_next(pred_block, get_block_succ_first(pred_block)) != NULL)
180                         continue;
181
182                 // schedule the 2 blocks behind each other
183                 ir_fprintf(stderr, "Coalesce (Jump) %+F -> %+F (%.3g)\n",
184                            pred_entry->block, entry->block, edge->execfreq);
185                 pred_entry->next = entry;
186                 entry->prev = pred_entry;
187         }
188
189         // run2: remaining edges
190         for(i = 0; i < edge_count; ++i) {
191                 const edge_t *edge = & env->edges[i];
192                 ir_node *block = edge->block;
193                 ir_node *pred_block;
194                 blocksched_entry_t *entry, *pred_entry;
195
196                 // the block might have been removed already...
197                 if(is_Bad(get_Block_cfgpred(block, 0)))
198                         continue;
199
200                 pred_block = get_Block_cfgpred_block(block, edge->pos);
201                 entry = get_irn_link(block);
202                 pred_entry = get_irn_link(pred_block);
203
204                 if(pred_entry->next != NULL || entry->prev != NULL)
205                         continue;
206
207                 // schedule the 2 blocks behind each other
208                 ir_fprintf(stderr, "Coalesce (CondJump) %+F -> %+F (%.3g)\n",
209                            pred_entry->block, entry->block, edge->execfreq);
210                 pred_entry->next = entry;
211                 entry->prev = pred_entry;
212         }
213 }
214
215 static void pick_block_successor(blocksched_entry_t *entry, blocksched_env_t *env)
216 {
217         ir_node *block = entry->block;
218         blocksched_entry_t *succ_entry;
219         const ir_edge_t *edge;
220         double best_succ_execfreq;
221         ir_node *succ = NULL;
222
223         if(irn_visited(block))
224                 return;
225         env->blockcount++;
226         mark_irn_visited(block);
227
228         ir_fprintf(stderr, "Pick succ of %+F\n", block);
229
230         // put all successors into the worklist
231         foreach_block_succ(block, edge) {
232                 ir_node *succ_block = get_edge_src_irn(edge);
233
234                 if(irn_visited(succ_block))
235                         continue;
236
237                 // we only need to put the first of a series of already connected
238                 // blocks into the worklist
239                 succ_entry = get_irn_link(succ_block);
240                 while(succ_entry->prev != NULL) {
241                         // break cycles...
242                         if(succ_entry->prev->block == succ_block) {
243                                 succ_entry->prev->next = NULL;
244                                 succ_entry->prev = NULL;
245                                 break;
246                         }
247                         succ_entry = succ_entry->prev;
248                 };
249
250                 if(irn_visited(succ_entry->block))
251                         continue;
252
253                 ir_fprintf(stderr, "Put %+F into worklist\n", succ_entry->block);
254                 pdeq_putr(env->worklist, succ_entry->block);
255         }
256
257         if(entry->next != NULL) {
258                 pick_block_successor(entry->next, env);
259                 return;
260         }
261
262         fprintf(stderr, "deciding...\n");
263         best_succ_execfreq = -1;
264         /* no successor yet: pick the successor block with the highest execution
265          * frequency which has no predecessor yet
266          */
267         foreach_block_succ(block, edge) {
268                 ir_node *succ_block = get_edge_src_irn(edge);
269
270                 if(irn_visited(succ_block))
271                         continue;
272
273                 succ_entry = get_irn_link(succ_block);
274                 if(succ_entry->prev != NULL)
275                         continue;
276
277                 double execfreq = get_block_execfreq(env->execfreqs, succ_block);
278                 if(execfreq > best_succ_execfreq) {
279                         best_succ_execfreq = execfreq;
280                         succ = succ_block;
281                 }
282         }
283
284         if(succ == NULL) {
285                 fprintf(stderr, "pick from worklist\n");
286
287                 do {
288                         if(pdeq_empty(env->worklist)) {
289                                 fprintf(stderr, "worklist empty\n");
290                                 return;
291                         }
292                         succ = pdeq_getl(env->worklist);
293                 } while(irn_visited(succ));
294         }
295
296         succ_entry = get_irn_link(succ);
297         entry->next = succ_entry;
298         succ_entry->prev = entry;
299
300         pick_block_successor(succ_entry, env);
301 }
302
303 static blocksched_entry_t *finish_block_schedule(blocksched_env_t *env)
304 {
305         ir_graph *irg = env->irg;
306         ir_node *startblock = get_irg_start_block(irg);
307         blocksched_entry_t *entry = get_irn_link(startblock);
308
309         inc_irg_visited(irg);
310
311         env->worklist = new_pdeq();
312         pick_block_successor(entry, env);
313         assert(pdeq_empty(env->worklist));
314         del_pdeq(env->worklist);
315
316         return entry;
317 }
318
319 static ir_node **create_blocksched_array(blocksched_entry_t *first, int count,
320                                          struct obstack* obst) {
321         int i = 0;
322         ir_node **block_list;
323         blocksched_entry_t *entry;
324
325         block_list = NEW_ARR_D(ir_node *, obst, count);
326         fprintf(stderr, "Blockschedule:\n");
327         for(entry = first; entry != NULL; entry = entry->next) {
328                 assert(i < count);
329                 block_list[i++] = entry->block;
330                 ir_fprintf(stderr, "\t%+F\n", entry->block);
331         }
332         assert(i == count);
333
334         return block_list;
335 }
336
337 static ir_node **create_block_schedule_greedy(ir_graph *irg, ir_exec_freq *execfreqs)
338 {
339         blocksched_env_t env;
340         struct obstack obst;
341         blocksched_entry_t *start_entry;
342         ir_node **block_list;
343
344         obstack_init(&obst);
345
346         env.irg = irg;
347         env.obst = &obst;
348         env.execfreqs = execfreqs;
349         env.edges = NEW_ARR_F(edge_t, 0);
350         env.worklist = NULL;
351         env.blockcount = 0;
352
353         // collect edge execution frequencies
354         irg_block_walk_graph(irg, collect_egde_frequency, NULL, &env);
355
356         // sort interblock edges by execution frequency
357         qsort(env.edges, ARR_LEN(env.edges), sizeof(env.edges[0]), cmp_edges);
358
359         be_remove_empty_blocks(irg);
360
361         if(algo != BLOCKSCHED_NAIV)
362                 coalesce_blocks(&env);
363
364         start_entry = finish_block_schedule(&env);
365
366         block_list = create_blocksched_array(start_entry, env.blockcount, get_irg_obstack(irg));
367
368         DEL_ARR_F(env.edges);
369         obstack_free(&obst, NULL);
370
371         return block_list;
372 }
373
374 /*
375  *  ___ _     ____
376  * |_ _| |   |  _ \
377  *  | || |   | |_) |
378  *  | || |___|  __/
379  * |___|_____|_|
380  *
381  */
382
383 #ifdef WITH_ILP
384 typedef struct _ilp_edge_t {
385         ir_node *block;
386         int pos;
387         int ilpvar;
388 } ilp_edge_t;
389
390 typedef struct _blocksched_ilp_env_t {
391         blocksched_env_t env;
392         ilp_edge_t *ilpedges;
393         lpp_t *lpp;
394 } blocksched_ilp_env_t;
395
396 typedef struct _blocksched_ilp_entry_t {
397         ir_node *block;
398         struct _blocksched_entry_t *next;
399         struct _blocksched_entry_t *prev;
400
401         int out_cst;
402 } blocksched_ilp_entry_t;
403
404 static int add_ilp_edge(ir_node *block, int pos, double execfreq, blocksched_ilp_env_t *env)
405 {
406         char name[64];
407         ilp_edge_t edge;
408         int edgeidx = ARR_LEN(env->ilpedges);
409
410         snprintf(name, sizeof(name), "edge%d", edgeidx);
411
412         edge.block = block;
413         edge.pos = pos;
414         edge.ilpvar = lpp_add_var_default(env->lpp, name, lpp_binary, execfreq, 1.0);
415
416         ARR_APP1(ilp_edge_t, env->ilpedges, edge);
417         return edgeidx;
418 }
419
420 static void collect_egde_frequency_ilp(ir_node *block, void *data)
421 {
422         blocksched_ilp_env_t *env = data;
423         ir_graph *irg = env->env.irg;
424         ir_node *startblock = get_irg_start_block(irg);
425         int arity;
426         blocksched_ilp_entry_t *entry;
427         lpp_cst_t cst;
428         char name[64];
429         int out_count;
430
431         snprintf(name, sizeof(name), "block_out_constr_%ld", get_irn_node_nr(block));
432         out_count = get_irn_n_edges_kind(block, EDGE_KIND_BLOCK);
433
434         entry = obstack_alloc(env->env.obst, sizeof(entry[0]));
435         entry->block = block;
436         entry->next = NULL;
437         entry->prev = NULL;
438         entry->out_cst = lpp_add_cst_uniq(env->lpp, name, lpp_greater, out_count - 1);
439         set_irn_link(block, entry);
440
441         if(block == startblock)
442                 return;
443
444         arity = get_irn_arity(block);
445         if(arity == 1) {
446                 double execfreq = get_block_execfreq(env->env.execfreqs, block);
447                 add_ilp_edge(block, 0, execfreq, env);
448         } else {
449                 int i;
450                 int *edgenums = alloca(sizeof(edgenums[0]) * arity);
451
452                 snprintf(name, sizeof(name), "block_in_constr_%ld", get_irn_node_nr(block));
453                 cst = lpp_add_cst_uniq(env->lpp, name, lpp_greater, arity - 1);
454
455                 for(i = 0; i < arity; ++i) {
456                         double execfreq;
457                         int edgenum;
458                         ilp_edge_t *edge;
459
460                         ir_node *pred_block = get_Block_cfgpred_block(block, i);
461                         execfreq = get_block_execfreq(env->env.execfreqs, pred_block);
462
463                         edgenum = add_ilp_edge(block, i, execfreq, env);
464                         edge = & env->ilpedges[edgenum];
465                         lpp_set_factor_fast(env->lpp, cst, edge->ilpvar, 1.0);
466                 }
467         }
468 }
469
470
471 static void coalesce_blocks_ilp(blocksched_ilp_env_t *env)
472 {
473         int i;
474         int edge_count = ARR_LEN(env->ilpedges);
475         FILE *f;
476         char fname[256];
477
478         /* complete out constraints */
479         for(i = 0; i < edge_count; ++i) {
480                 const ilp_edge_t *edge = & env->ilpedges[i];
481                 ir_node *block = edge->block;
482                 ir_node *pred;
483                 blocksched_ilp_entry_t *entry;
484
485                 // the block might have been removed already...
486                 if(is_Bad(get_Block_cfgpred(block, 0)))
487                         continue;
488
489                 pred = get_Block_cfgpred_block(block, edge->pos);
490                 entry = get_irn_link(pred);
491
492                 ir_printf("Adding out cst to %+F from %+F,%d\n",
493                                   pred, block, edge->pos);
494                 lpp_set_factor_fast(env->lpp, entry->out_cst, edge->ilpvar, 1.0);
495         }
496
497         lpp_dump(env->lpp, "lpp.out");
498         snprintf(fname, sizeof(fname), "lpp_%s.plain", get_irg_dump_name(env->env.irg));
499         f = fopen(fname, "w");
500         lpp_dump_plain(env->lpp, f);
501         fclose(f);
502         //lpp_solve_net(env->lpp, main_env->options->ilp_server, main_env->options->ilp_solver);
503         lpp_solve_net(env->lpp, "i44pc52", "cplex");
504         assert(lpp_is_sol_valid(env->lpp));
505
506         /* Apply results to edges */
507         for(i = 0; i < edge_count; ++i) {
508                 const ilp_edge_t *edge = & env->ilpedges[i];
509                 ir_node *block = edge->block;
510                 ir_node *pred;
511                 int is_jump;
512                 blocksched_entry_t *entry;
513                 blocksched_entry_t *pred_entry;
514
515                 // the block might have been removed already...
516                 if(is_Bad(get_Block_cfgpred(block, 0)))
517                         continue;
518
519                 is_jump = lpp_get_var_sol(env->lpp, edge->ilpvar);
520                 if(is_jump)
521                         continue;
522
523                 pred = get_Block_cfgpred_block(block, edge->pos);
524                 entry = get_irn_link(block);
525                 pred_entry = get_irn_link(pred);
526
527                 assert(entry->prev == NULL && pred_entry->next == NULL);
528                 entry->prev = pred_entry;
529                 pred_entry->next = entry;
530         }
531 }
532
533 static ir_node **create_block_schedule_ilp(ir_graph *irg, ir_exec_freq *execfreqs)
534 {
535         blocksched_ilp_env_t env;
536         struct obstack obst;
537         blocksched_entry_t *start_entry;
538         ir_node **block_list;
539
540         obstack_init(&obst);
541
542         env.env.irg = irg;
543         env.env.obst = &obst;
544         env.env.execfreqs = execfreqs;
545         env.env.worklist = NULL;
546         env.env.blockcount = 0;
547         env.ilpedges = NEW_ARR_F(ilp_edge_t, 0);
548
549         env.lpp = new_lpp("blockschedule", lpp_minimize);
550         lpp_set_time_limit(env.lpp, 20);
551         lpp_set_log(env.lpp, stdout);
552
553         irg_block_walk_graph(irg, collect_egde_frequency_ilp, NULL, &env);
554
555         be_remove_empty_blocks(irg);
556
557         coalesce_blocks_ilp(&env);
558
559         start_entry = finish_block_schedule(&env.env);
560
561         block_list = create_blocksched_array(start_entry, env.env.blockcount, get_irg_obstack(irg));
562
563         DEL_ARR_F(env.ilpedges);
564         free_lpp(env.lpp);
565         obstack_free(&obst, NULL);
566
567         return block_list;
568 }
569 #endif
570
571 /*
572  *  _____      _   ____  ____
573  * | ____|_  _| |_| __ )| __ )
574  * |  _| \ \/ / __|  _ \|  _ \
575  * | |___ >  <| |_| |_) | |_) |
576  * |_____/_/\_\\__|____/|____/
577  *
578  */
579
580 /** A simple forward single linked list. */
581 typedef struct {
582         ir_node *start;   /**< start of the list */
583         ir_node *end;     /**< last block in the list */
584         unsigned n_blks;  /**< number of blocks in the list */
585 } anchor;
586
587 static void add_block(anchor *list, ir_node *block) {
588         if(list->start == NULL) {
589                 list->start = block;
590                 list->end = block;
591         } else {
592                 set_irn_link(list->end, block);
593                 list->end = block;
594         }
595
596         list->n_blks++;
597 }
598
599 static void create_block_list(ir_node *leader_block, anchor *list) {
600         int i;
601         ir_node *block = NULL;
602         const ir_edge_t *edge;
603
604         ir_extblk *extbb = get_Block_extbb(leader_block);
605         if(extbb_visited(extbb))
606                 return;
607         mark_extbb_visited(extbb);
608
609         for(i = 0; i < get_extbb_n_blocks(extbb); ++i) {
610                 block = get_extbb_block(extbb, i);
611                 add_block(list, block);
612         }
613
614         assert(block != NULL);
615
616         // pick successor extbbs
617         foreach_block_succ(block, edge) {
618                 ir_node *succ = get_edge_src_irn(edge);
619
620                 create_block_list(succ, list);
621         }
622
623         for(i = 0; i < get_extbb_n_blocks(extbb) - 1; ++i) {
624                 block = get_extbb_block(extbb, i);
625                 foreach_block_succ(block, edge) {
626                         ir_node *succ = get_edge_src_irn(edge);
627
628                         create_block_list(succ, list);
629                 }
630         }
631 }
632
633 void compute_extbb_execfreqs(ir_graph *irg, ir_exec_freq *execfreqs);
634
635 /*
636  * Calculates a block schedule. The schedule is stored as a linked
637  * list starting at the start_block of the irg.
638  */
639 static ir_node **create_extbb_block_schedule(ir_graph *irg, ir_exec_freq *execfreqs)
640 {
641         anchor list;
642         ir_node **blk_list, *b, *n;
643         unsigned i;
644
645         /* schedule extended basic blocks */
646         compute_extbb_execfreqs(irg, execfreqs);
647         //compute_extbb(irg);
648
649         list.start  = NULL;
650         list.end    = NULL;
651         list.n_blks = 0;
652         inc_irg_block_visited(irg);
653         create_block_list(get_irg_start_block(irg), &list);
654
655         /** create an array, so we can go forward and backward */
656         blk_list = NEW_ARR_D(ir_node *, irg->obst,list.n_blks);
657
658         for (i = 0, b = list.start; b; b = n, ++i) {
659                 n = get_irn_link(b);
660                 blk_list[i] = b;
661         }
662
663         return blk_list;
664 }
665
666 /*
667  *  __  __       _
668  * |  \/  | __ _(_)_ __
669  * | |\/| |/ _` | | '_ \
670  * | |  | | (_| | | | | |
671  * |_|  |_|\__,_|_|_| |_|
672  *
673  */
674
675 #ifdef WITH_LIBCORE
676 void be_block_schedule_register_options(lc_opt_entry_t *grp)
677 {
678         static int run_once = 0;
679         lc_opt_entry_t *blocksched_grp;
680
681         if(run_once)
682                 return;
683         run_once = 1;
684         blocksched_grp = lc_opt_get_grp(grp, "blocksched");
685
686         lc_opt_add_table(blocksched_grp, be_blocksched_options);
687 }
688 #endif
689
690 ir_node **be_create_block_schedule(ir_graph *irg, ir_exec_freq *execfreqs)
691 {
692         switch(algo) {
693         case BLOCKSCHED_GREEDY:
694         case BLOCKSCHED_NAIV:
695                 return create_block_schedule_greedy(irg, execfreqs);
696         case BLOCKSCHED_EXTBB:
697                 return create_extbb_block_schedule(irg, execfreqs);
698 #ifdef WITH_ILP
699         case BLOCKSCHED_ILP:
700                 return create_block_schedule_ilp(irg, execfreqs);
701 #endif
702         }
703
704         assert(0 && "unknown blocksched algo");
705         return NULL;
706 }