2f776b046021322ed4145cfb307600cea8cb41e7
[libfirm] / ir / be / beilpsched.c
1 /**
2  * Scheduling algorithms.
3  * An ILP scheduler based on
4  * "ILP-based Instruction Scheduling for IA-64"
5  * by Daniel Kaestner and Sebastian Winkel
6  *
7  * @date   22.10.2005
8  * @author Christian Wuerdig
9  * @cvs-id $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #ifdef WITH_ILP
16
17 #include <math.h>
18
19 #ifndef _WIN32
20 #include <strings.h>
21 #endif /* _WIN32 */
22
23 #include "irnode_t.h"
24 #include "irgwalk.h"
25 #include "irbitset.h"
26 #include "irphase_t.h"
27 #include "iredges.h"
28 #include "debug.h"
29 #include "pdeq.h"
30 #include "irtools.h"
31 #include "irdump.h"
32
33 #include <lpp/lpp.h>
34 #include <lpp/lpp_net.h>
35
36 #ifdef WITH_LIBCORE
37 #include <libcore/lc_opts.h>
38 #include <libcore/lc_opts_enum.h>
39 #include <libcore/lc_timing.h>
40 #endif /* WITH_LIBCORE */
41
42 #include "be.h"
43 #include "benode_t.h"
44 #include "besched_t.h"
45
46 typedef struct _ilpsched_options_t {
47         unsigned time_limit;
48         char     log_file[1024];
49 } ilpsched_options_t;
50
51 typedef struct _unit_type_info_t {
52         int                            n_units;
53         const be_execution_unit_type_t *tp;
54 } unit_type_info_t;
55
56 /* attributes for a node */
57 typedef struct _ilpsched_node_attr_t {
58         unsigned asap;                     /**< The ASAP scheduling control step */
59         unsigned alap;                     /**< The ALAP scheduling control step */
60         unsigned sched_point;              /**< the step in which the node is finally scheduled */
61         unsigned visit_idx;                /**< Index of the node having visited this node last */
62         unsigned block_idx : 31;           /**< A unique per block index */
63         unsigned enqueued  : 1;            /**< Node is already enqueued for ALAP calculation */
64         bitset_t *transitive_block_nodes;  /**< Set of transitive block nodes (predecessors
65                                                                                         for ASAP, successors for ALAP */
66         unsigned n_unit_types;             /**< number of allowed execution unit types */
67         unit_type_info_t *type_info;       /**< list of allowed execution unit types */
68         int      *ilp_vars;                /**< the binary ilp variables x_{nt}^k assigned to this node
69                                                                                         (== 1 iff: node n is executed at step t on execunit k
70                                                                                         So we have: |ASAP(n) ... ALAP(n)| * |execunits| variables
71                                                                                 */
72 } ilpsched_node_attr_t;
73
74 /* attributes for a block */
75 typedef struct _ilpsched_block_attr_t {
76         unsigned block_last_idx;        /**< The highest node index in block so far */
77         unsigned n_interesting_nodes;   /**< The number of nodes interesting for scheduling */
78         waitq    *root_nodes;           /**< A queue of nodes having no user in current block */
79         ir_node  *head_ilp_nodes;       /**< A linked list of nodes which will contribute to ILP */
80 } ilpsched_block_attr_t;
81
82 typedef union _ilpsched_attr_ {
83         ilpsched_node_attr_t  node_attr;
84         ilpsched_block_attr_t block_attr;
85 } ilpsched_attr_t;
86
87 /* A irn for the phase and it's attributes (either node or block) */
88 typedef struct {
89         ir_node         *irn;
90         ilpsched_attr_t attr;
91 } be_ilpsched_irn_t;
92
93 /* The ILP scheduling environment */
94 typedef struct {
95         phase_t             ph;            /**< The phase */
96         ir_graph            *irg;          /**< The current irg */
97         waitq               *alap_queue;   /**< An queue of nodes waiting for final ALAP calculation */
98         const arch_env_t    *arch_env;
99         const be_main_env_t *main_env;
100         const be_machine_t  *cpu;          /**< the current abstract machine */
101         ilpsched_options_t  *opts;         /**< the ilp options for current irg */
102         DEBUG_ONLY(firm_dbg_module_t *dbg);
103 } be_ilpsched_env_t;
104
105 /* convenience macros to handle phase irn data */
106 #define get_ilpsched_irn(ilpsched_env, irn) (phase_get_or_set_irn_data(&(ilpsched_env)->ph, (irn)))
107 #define is_ilpsched_block(node)             (is_Block((node)->irn))
108 #define get_ilpsched_block_attr(block)      (&(block)->attr.block_attr)
109 #define get_ilpsched_node_attr(node)        (&(node)->attr.node_attr)
110
111 /* iterate over a list of ir_nodes linked by link field */
112 #define foreach_linked_irns(head, iter) for ((iter) = (head); (iter); (iter) = get_irn_link((iter)))
113
114 /* check if node is considered for ILP scheduling */
115 #define consider_for_sched(irn) \
116         (! (is_Block(irn)   ||      \
117                 is_Proj(irn)    ||      \
118                 is_Phi(irn)     ||      \
119                 be_is_Keep(irn) ||      \
120                 is_NoMem(irn)   ||      \
121                 is_Jmp(irn)     ||      \
122                 is_End(irn)             \
123                 ))
124
125 /* gives the valid scheduling time step interval for a node */
126 #define VALID_SCHED_INTERVAL(na) ((na)->alap - (na)->asap + 1)
127
128 /* gives the corresponding ILP variable for given node, unit and time step */
129 #define ILPVAR_IDX(na, unit, control_step) \
130         ((unit) * VALID_SCHED_INTERVAL((na)) + (control_step) - (na)->asap + 1)
131
132 /* check if a double value is within an epsilon environment of 0 */
133 #define LPP_VALUE_IS_0(dbl) (fabs((dbl)) <= 1e-10)
134
135 /* option variable */
136 static ilpsched_options_t ilp_opts = {
137         120,   /* 120 sec per block time limit */
138         ""     /* no log file */
139 };
140
141 #ifdef WITH_LIBCORE
142 /* ILP options */
143 static const lc_opt_table_entry_t ilpsched_option_table[] = {
144         LC_OPT_ENT_INT("time_limit", "ILP time limit per block", &ilp_opts.time_limit),
145         LC_OPT_ENT_STR("lpp_log",    "LPP logfile (stderr and stdout are supported)", ilp_opts.log_file, sizeof(ilp_opts.log_file)),
146         { NULL }
147 };
148 #endif /* WITH_LIBCORE */
149
150 /**
151  * Compare scheduling time steps of two be_ilpsched_irn's.
152  */
153 static int cmp_ilpsched_irn(const void *a, const void *b) {
154         be_ilpsched_irn_t    *n1   = *(be_ilpsched_irn_t **)a;
155         be_ilpsched_irn_t    *n2   = *(be_ilpsched_irn_t **)b;
156         ilpsched_node_attr_t *n1_a = get_ilpsched_node_attr(n1);
157         ilpsched_node_attr_t *n2_a = get_ilpsched_node_attr(n2);
158
159         return QSORT_CMP(n1_a->sched_point, n2_a->sched_point);
160 }
161
162 /**
163  * In case there is no phase information for irn, initialize it.
164  */
165 static void *init_ilpsched_irn(phase_t *ph, ir_node *irn, void *old) {
166         be_ilpsched_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
167
168         if (res == old) {
169                 /* if we have already some data: check for reinitialization */
170
171                 if (! is_Block(irn)) {
172                         ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
173
174                         if (! na->transitive_block_nodes) {
175                                 ir_node               *block      = get_nodes_block(irn);
176                                 be_ilpsched_irn_t     *block_node = phase_get_or_set_irn_data(ph, block);
177                                 ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
178
179                                 /* we are called after the block indicees have been build: create bitset */
180                                 na->transitive_block_nodes = bitset_obstack_alloc(phase_obst(ph), ba->block_last_idx);
181                         }
182                         else {
183                                 /* we are called from reinit block data: clear the bitset */
184                                 bitset_clear_all(na->transitive_block_nodes);
185                                 na->visit_idx = 0;
186                         }
187                 }
188                 return old;
189         }
190
191         res->irn = irn;
192
193         /* set ilpsched irn attributes (either block or irn) */
194         if (is_Block(irn)) {
195                 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(res);
196
197                 ba->n_interesting_nodes = 0;
198                 ba->block_last_idx      = 0;
199                 ba->root_nodes          = new_waitq();
200                 ba->head_ilp_nodes      = NULL;
201         }
202         else {
203                 ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
204                 memset(na, 0, sizeof(*na));
205         }
206
207         return res;
208 }
209
210 /**
211  * Assign a per block unique number to each node.
212  */
213 static void build_block_idx(ir_node *irn, void *walk_env) {
214         be_ilpsched_env_t     *env = walk_env;
215         be_ilpsched_irn_t     *node, *block_node;
216         ilpsched_node_attr_t  *na;
217         ilpsched_block_attr_t *ba;
218
219         if (! consider_for_sched(irn))
220                 return;
221
222         node       = get_ilpsched_irn(env, irn);
223         na         = get_ilpsched_node_attr(node);
224         block_node = get_ilpsched_irn(env, get_nodes_block(irn));
225         ba         = get_ilpsched_block_attr(block_node);
226
227         na->block_idx = ba->block_last_idx++;
228 }
229
230 /********************************************************
231  *                              __        _
232  *                             / /       | |
233  *   __ _ ___  __ _ _ __      / /    __ _| | __ _ _ __
234  *  / _` / __|/ _` | '_ \    / /    / _` | |/ _` | '_ \
235  * | (_| \__ \ (_| | |_) |  / /    | (_| | | (_| | |_) |
236  *  \__,_|___/\__,_| .__/  /_/      \__,_|_|\__,_| .__/
237  *                 | |                           | |
238  *                 |_|                           |_|
239  ********************************************************/
240
241 /**
242  * Add all nodes having no user in current block to last_nodes list.
243  */
244 static void collect_alap_root_nodes(ir_node *irn, void *walk_env) {
245         ir_node               *block;
246         const ir_edge_t       *edge;
247         be_ilpsched_irn_t     *block_node;
248         ilpsched_block_attr_t *ba;
249         be_ilpsched_env_t     *env           = walk_env;
250         int                   has_block_user = 0;
251         ir_edge_kind_t        ekind[2]       = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
252         int                   i, j;
253
254         if (! consider_for_sched(irn))
255                 return;
256
257         block = get_nodes_block(irn);
258
259         DBG((env->dbg, LEVEL_3, "%+F (%+F) is interesting, examining ... ", irn, block));
260
261         for (i = 0; i < 2; ++i) {
262                 foreach_out_edge_kind(irn, edge, ekind[i]) {
263                         ir_node *user = get_edge_src_irn(edge);
264
265                         if (is_Proj(user)) {
266                                 const ir_edge_t *user_edge;
267
268                                 if (get_irn_mode(user) == mode_X)
269                                         continue;
270
271                                 /* The ABI ensures, that there will be no ProjT nodes in the graph. */
272                                 for (j = 0; j < 2; ++j) {
273                                         foreach_out_edge_kind(user, user_edge, ekind[j]) {
274                                                 ir_node *real_user = get_edge_src_irn(user_edge);
275
276                                                 if (! is_Phi(real_user) && ! be_is_Keep(real_user) && get_nodes_block(real_user) == block) {
277                                                         has_block_user = 1;
278                                                         break;
279                                                 }
280                                         }
281                                 }
282
283                                 if (has_block_user)
284                                         break;
285                         }
286                         else if (is_Block(user)) {
287                                 continue;
288                         }
289                         else if (! is_Phi(user) && ! be_is_Keep(user) && get_nodes_block(user) == block) {
290                                 has_block_user = 1;
291                                 break;
292                         }
293                 }
294         }
295
296         block_node = get_ilpsched_irn(env, block);
297         ba         = get_ilpsched_block_attr(block_node);
298
299         ba->n_interesting_nodes++;
300
301         /* current irn has no user inside this block, add to queue */
302         if (! has_block_user) {
303                 DB((env->dbg, LEVEL_3, "root node\n"));
304                 waitq_put(ba->root_nodes, irn);
305         }
306         else {
307                 DB((env->dbg, LEVEL_3, "normal node\n"));
308         }
309 }
310
311 /**
312  * Calculate the ASAP scheduling step for current irn.
313  */
314 static void calculate_irn_asap(ir_node *irn, void *walk_env) {
315         be_ilpsched_irn_t *node;
316         be_ilpsched_env_t *env = walk_env;
317         int      i;
318         ir_node  *block;
319         ilpsched_node_attr_t *na;
320
321         /* These nodes are handled separate */
322         if (! consider_for_sched(irn))
323                 return;
324
325         DBG((env->dbg, LEVEL_2, "Calculating ASAP of node %+F\n", irn));
326
327         node  = get_ilpsched_irn(env, irn);
328         block = get_nodes_block(irn);
329         na    = get_ilpsched_node_attr(node);
330
331         /* accumulate all transitive predecessors of current node */
332         for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
333                 ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
334                 be_ilpsched_irn_t    *pred_node;
335                 ilpsched_node_attr_t *pna;
336                 unsigned             idx;
337
338                 if (be_is_Keep(pred))
339                         pred = skip_Proj(get_irn_n(pred, 0));
340
341                 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
342                         continue;
343
344                 pred_node = get_ilpsched_irn(env, pred);
345                 pna       = get_ilpsched_node_attr(pred_node);
346                 idx       = get_irn_idx(irn);
347
348                 assert(pna->asap && "missing ASAP of predecessor");
349
350                 /*
351                         We have not already visited this predecessor
352                         -> accumulate it's predecessors
353                 */
354                 if (pna->visit_idx != idx) {
355                         pna->visit_idx = idx;
356                         na->transitive_block_nodes = bitset_or(na->transitive_block_nodes, pna->transitive_block_nodes);
357                         DBG((env->dbg, LEVEL_3, "\taccumulating preds of %+F\n", pred));
358                 }
359         }
360
361         /* every node is it's own transitive predecessor in block */
362         bitset_set(na->transitive_block_nodes, na->block_idx);
363
364         /* asap = number of transitive predecessors in this block */
365         na->asap = bitset_popcnt(na->transitive_block_nodes);
366
367         DBG((env->dbg, LEVEL_2, "\tcalculated ASAP is %u\n", na->asap));
368 }
369
370 /**
371  * Accumulate the successors of all nodes from irn on upwards.
372  */
373 static void accumulate_succs(be_ilpsched_env_t *env, ir_node *irn) {
374         unsigned             i, n;
375         be_ilpsched_irn_t    *node  = get_ilpsched_irn(env, irn);
376         ilpsched_node_attr_t *na    = get_ilpsched_node_attr(node);
377         ir_node              *block = get_nodes_block(irn);
378         waitq                *wq    = new_waitq();
379
380         DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F\n", irn));
381
382         /* enqueue node for final alap calculation */
383         if (! na->enqueued) {
384                 be_ilpsched_irn_t     *block_node = get_ilpsched_irn(env, block);
385                 ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
386
387                 na->enqueued = 1;
388                 na->alap     = ba->n_interesting_nodes;
389                 waitq_put(env->alap_queue, node);
390
391                 set_irn_link(irn, ba->head_ilp_nodes);
392                 ba->head_ilp_nodes = irn;
393                 DBG((env->dbg, LEVEL_5, "\t\tlinked %+F to ilp nodes of %+F, attr %p\n", irn, block, ba));
394                 DBG((env->dbg, LEVEL_4, "\t\tenqueueing %+F for final ALAP calculation\n", irn));
395         }
396
397         for (i = 0, n = get_irn_ins_or_deps(irn); i < n; ++i) {
398                 ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
399                 unsigned             idx;
400                 be_ilpsched_irn_t    *pred_node;
401                 ilpsched_node_attr_t *pna;
402
403                 if (be_is_Keep(pred))
404                         pred = skip_Proj(get_irn_n(pred, 0));
405
406                 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
407                         continue;
408
409                 pred_node = get_ilpsched_irn(env, pred);
410                 pna       = get_ilpsched_node_attr(pred_node);
411                 idx       = get_irn_idx(irn);
412
413                 /* accumulate the successors */
414                 if (pna->visit_idx != idx) {
415                         pna->visit_idx = idx;
416                         pna->transitive_block_nodes = bitset_or(pna->transitive_block_nodes, na->transitive_block_nodes);
417
418                         /* set current node as successor */
419                         bitset_set(pna->transitive_block_nodes, na->block_idx);
420                         waitq_put(wq, pred);
421
422                         DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F to %+F\n", irn, pred));
423                 }
424         }
425
426         /* process all predecessors */
427         while (! waitq_empty(wq)) {
428                 accumulate_succs(env, waitq_get(wq));
429         }
430
431         del_waitq(wq);
432 }
433
434 /**
435  * Calculate the ALAP scheduling step of all irns in current block.
436  * Depends on ASAP beeing calculated.
437  */
438 static void calculate_block_alap(ir_node *block, void *walk_env) {
439         be_ilpsched_env_t     *env        = walk_env;
440         be_ilpsched_irn_t     *block_node = get_ilpsched_irn(env, block);
441         ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
442
443         assert(is_Block(block));
444
445         DBG((env->dbg, LEVEL_2, "Calculating ALAP for nodes in %+F (%u nodes)\n", block, ba->n_interesting_nodes));
446
447         /* TODO: Might be faster to use out edges and call phase_reinit_single_irn_data */
448         phase_reinit_block_irn_data(&env->ph, block);
449
450         /* calculate the alap of all nodes, starting at collected roots upwards */
451         while (! waitq_empty(ba->root_nodes)) {
452                 accumulate_succs(env, waitq_get(ba->root_nodes));
453         }
454
455         /* we don't need it anymore */
456         del_waitq(ba->root_nodes);
457         ba->root_nodes = NULL;
458
459         /* all interesting nodes should have their successors accumulated now */
460         while (! waitq_empty(env->alap_queue)) {
461                 be_ilpsched_irn_t    *node = waitq_get(env->alap_queue);
462                 ilpsched_node_attr_t *na   = get_ilpsched_node_attr(node);
463
464                 /* control flow ops must always be scheduled last */
465                 if (is_cfop(node->irn) && ! is_Start(node->irn) && get_irn_opcode(node->irn) != iro_End)
466                         na->asap = na->alap;
467                 else
468                         na->alap -= bitset_popcnt(na->transitive_block_nodes);
469                 DBG((env->dbg, LEVEL_2, "\tALAP of %+F is %u (%u succs)\n", node->irn, na->alap, bitset_popcnt(na->transitive_block_nodes)));
470         }
471 }
472
473
474 /*************************
475  *   _____ _      _____
476  *  |_   _| |    |  __ \
477  *    | | | |    | |__) |
478  *    | | | |    |  ___/
479  *   _| |_| |____| |
480  *  |_____|______|_|
481  *
482  *************************/
483
484 /**
485  * Check if node can be executed on given unit type.
486  */
487 static INLINE int is_valid_unit_type_for_node(const be_execution_unit_type_t *tp, be_ilpsched_irn_t *node) {
488         int                  i;
489         ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
490
491         for (i = na->n_unit_types - 1; i >= 0; --i) {
492                 if (na->type_info[i].tp == tp)
493                         return i;
494         }
495
496         return -1;
497 }
498
499 static INLINE void check_for_keeps(waitq *keeps, ir_node *block, ir_node *irn) {
500         const ir_edge_t *edge;
501
502         foreach_out_edge(irn, edge) {
503                 ir_node *user = get_edge_src_irn(edge);
504
505                 if (be_is_Keep(user)) {
506                         assert(get_nodes_block(user) == block && "Keep must not be in different block.");
507                         waitq_put(keeps, user);
508                 }
509         }
510 }
511
512 /**
513  * Adds a node, it's Projs (in case of mode_T nodes) and
514  * it's Keeps to schedule.
515  */
516 static void add_to_sched(ir_node *block, ir_node *irn) {
517         const ir_edge_t *edge;
518         waitq           *keeps = new_waitq();
519
520         if (get_irn_mode(irn) == mode_M)
521                 return;
522
523         sched_add_before(block, irn);
524
525         /* add Projs */
526         if (get_irn_mode(irn) == mode_T) {
527                 foreach_out_edge(irn, edge) {
528                         ir_node *user = get_edge_src_irn(edge);
529
530                         assert(is_Proj(user) && "User of mode_T node must be a Proj");
531
532                         if (to_appear_in_schedule(user))
533                                 sched_add_before(block, user);
534
535                         check_for_keeps(keeps, block, user);
536                 }
537         }
538         else {
539                 check_for_keeps(keeps, block, irn);
540         }
541
542         /* add Keeps */
543         while (! waitq_empty(keeps)) {
544                 ir_node *keep = waitq_get(keeps);
545                 if (! sched_is_scheduled(keep))
546                         sched_add_before(block, keep);
547         }
548
549         del_waitq(keeps);
550 }
551
552 /**
553  * Schedule all nodes in the given block, according to the ILP solution.
554  */
555 static void apply_solution(be_ilpsched_env_t *env, lpp_t *lpp, ir_node *block) {
556         be_ilpsched_irn_t     *block_node = get_ilpsched_irn(env, block);
557         ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
558         sched_info_t          *info       = get_irn_sched_info(block);
559         be_ilpsched_irn_t     **sched_nodes;
560         unsigned              i, l;
561         ir_node               *cfop, *irn;
562         const ir_edge_t       *edge;
563
564         /* init block schedule list */
565         INIT_LIST_HEAD(&info->list);
566         info->scheduled = 1;
567
568         /* collect nodes and their scheduling time step */
569         sched_nodes = NEW_ARR_F(be_ilpsched_irn_t *, 0);
570         if (ba->n_interesting_nodes == 0) {
571                 /* ignore */
572         }
573         else if (ba->n_interesting_nodes == 1) {
574                 be_ilpsched_irn_t *node = get_ilpsched_irn(env, ba->head_ilp_nodes);
575
576                 /* add the single node */
577                 ARR_APP1(be_ilpsched_irn_t *, sched_nodes, node);
578         }
579         else {
580                 foreach_linked_irns(ba->head_ilp_nodes, irn) {
581                         be_ilpsched_irn_t    *node;
582                         ilpsched_node_attr_t *na;
583                         int                  tp_idx, found;
584                         unsigned             cur_var, t;
585
586                         node    = get_ilpsched_irn(env, irn);
587                         na      = get_ilpsched_node_attr(node);
588                         cur_var = 0;
589                         found   = 0;
590
591                         for (tp_idx = na->n_unit_types - 1; ! found && tp_idx >= 0; --tp_idx) {
592                                 for (t = na->asap - 1; ! found && t <= na->alap - 1; ++t) {
593                                         double val = lpp_get_var_sol(lpp, na->ilp_vars[cur_var++]);
594                                         if (! LPP_VALUE_IS_0(val)) {
595                                                 na->sched_point = t;
596                                                 ARR_APP1(be_ilpsched_irn_t *, sched_nodes, node);
597                                                 DBG((env->dbg, LEVEL_1, "Schedpoint of %+F is %u at unit type %s\n",
598                                                         irn, t, na->type_info[tp_idx].tp->name));
599                                                 found = 1;
600                                         }
601                                 }
602                         }
603                 }
604
605                 /* sort nodes ascending by scheduling time step */
606                 qsort(sched_nodes, ARR_LEN(sched_nodes), sizeof(sched_nodes[0]), cmp_ilpsched_irn);
607         }
608
609         /* make all Phis ready and remember the single cf op */
610         cfop = NULL;
611         foreach_out_edge(block, edge) {
612                 irn = get_edge_src_irn(edge);
613
614                 switch (get_irn_opcode(irn)) {
615                         case iro_Phi:
616                                 add_to_sched(block, irn);
617                                 break;
618                         case iro_Start:
619                         case iro_End:
620                         case iro_Proj:
621                         case iro_Bad:
622                                 break;
623                         default:
624                                 if (is_cfop(irn)) {
625                                         assert(cfop == NULL && "Highlander!");
626                                         cfop = irn;
627                                 }
628                         break;
629                 }
630         }
631
632         /* add all nodes from list */
633         for (i = 0, l = ARR_LEN(sched_nodes); i < l; ++i) {
634                 add_to_sched(block, sched_nodes[i]->irn);
635         }
636
637         /* schedule control flow node if not already done */
638         if (cfop && ! sched_is_scheduled(cfop))
639                 add_to_sched(block, cfop);
640
641         DEL_ARR_F(sched_nodes);
642 }
643
644 /**
645  * Create the ilp (add variables, build constraints, solve, build schedule from solution).
646  */
647 static void create_ilp(ir_node *block, void *walk_env) {
648         be_ilpsched_env_t     *env           = walk_env;
649         be_ilpsched_irn_t     *block_node    = get_ilpsched_irn(env, block);
650         ilpsched_block_attr_t *ba            = get_ilpsched_block_attr(block_node);
651         unsigned              num_block_var  = 0;
652         unsigned              num_nodes      = 0;
653         unsigned              n_instr_max    = env->cpu->bundle_size * env->cpu->bundels_per_cycle;
654         bitset_t              *bs_block_irns = bitset_alloca(ba->block_last_idx);
655         FILE                  *logfile       = NULL;
656         lpp_t                 *lpp           = NULL;
657         unsigned              num_cst_assign, num_cst_prec, num_cst_resrc, num_cst_bundle;
658         unsigned              t;
659         int                   glob_type_idx;
660         ir_node               *irn;
661         char                  buf[1024];
662         struct obstack        var_obst;
663         lc_timer_t            *t_var;
664         lc_timer_t            *t_cst_assign;
665         lc_timer_t            *t_cst_prec;
666         lc_timer_t            *t_cst_rsrc;
667         lc_timer_t            *t_cst_bundle;
668         double                fact_var, fact_cst;
669
670 #ifdef WITH_LIBCORE
671         t_var        = lc_timer_register("beilpsched_var",        "create ilp variables");
672         t_cst_assign = lc_timer_register("beilpsched_cst_assign", "create assignment constraints");
673         t_cst_prec   = lc_timer_register("beilpsched_cst_prec",   "create precedence constraints");
674         t_cst_rsrc   = lc_timer_register("beilpsched_cst_rsrc",   "create resource constraints");
675         t_cst_bundle = lc_timer_register("beilpsched_cst_bundle", "create bundle constraints");
676         LC_STOP_AND_RESET_TIMER(t_var);
677         LC_STOP_AND_RESET_TIMER(t_cst_assign);
678         LC_STOP_AND_RESET_TIMER(t_cst_prec);
679         LC_STOP_AND_RESET_TIMER(t_cst_rsrc);
680         LC_STOP_AND_RESET_TIMER(t_cst_bundle);
681 #endif /* WITH_LIBCORE */
682
683         DBG((env->dbg, 255, "\n\n\n=========================================\n"));
684         DBG((env->dbg, 255, "  ILP Scheduling for %+F\n", block));
685         DBG((env->dbg, 255, "=========================================\n\n"));
686
687         DBG((env->dbg, LEVEL_1, "Creating ILP Variables for nodes in %+F (%u interesting nodes)\n",
688                 block, ba->n_interesting_nodes));
689
690         if (ba->n_interesting_nodes > 1) {
691                 fact_var = ba->n_interesting_nodes < 25 ? 1.0 : 0.6;
692                 fact_cst = ba->n_interesting_nodes < 25 ? 0.8 : 0.6;
693
694                 lpp = new_lpp_userdef(
695                         "be ilp scheduling",
696                         lpp_minimize,
697                         (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_var) + 1,  /* num vars */
698                         (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_cst) + 20, /* num cst */
699                         1.2);                                                                               /* grow factor */
700                 obstack_init(&var_obst);
701
702                 lc_timer_push(t_var);
703                 foreach_linked_irns(ba->head_ilp_nodes, irn) {
704                         const be_execution_unit_t ***execunits = arch_isa_get_allowed_execution_units(env->arch_env->isa, irn);
705                         be_ilpsched_irn_t         *node;
706                         ilpsched_node_attr_t      *na;
707                         unsigned                  n_unit_types, tp_idx, unit_idx, cur_var, n_var, cur_unit;
708
709                         /* count number of available unit types for this node */
710                         for (n_unit_types = 0; execunits[n_unit_types]; ++n_unit_types)
711                                 /* just count */ ;
712
713                         node = get_ilpsched_irn(env, irn);
714                         na   = get_ilpsched_node_attr(node);
715
716                         na->n_unit_types = n_unit_types;
717                         na->type_info    = NEW_ARR_D(unit_type_info_t, &var_obst, n_unit_types);
718
719                         /* fill the type info array */
720                         for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
721                                 for (unit_idx = 0; execunits[tp_idx][unit_idx]; ++unit_idx)
722                                         /* just count units of this type */;
723
724                                 na->type_info[tp_idx].tp      = execunits[tp_idx][0]->tp;
725                                 na->type_info[tp_idx].n_units = unit_idx;
726                         }
727
728                         /* allocate space for ilp variables */
729                         na->ilp_vars = NEW_ARR_D(int, &var_obst, n_unit_types * VALID_SCHED_INTERVAL(na));
730                         memset(na->ilp_vars, -1, n_unit_types * VALID_SCHED_INTERVAL(na) * sizeof(na->ilp_vars[0]));
731
732                         DBG((env->dbg, LEVEL_3, "\thandling %+F (asap %u, alap %u, unit types %u):\n",
733                                 irn, na->asap, na->alap, na->n_unit_types));
734
735                         cur_var = cur_unit = n_var = 0;
736                         /* create variables */
737                         for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
738                                 for (t = na->asap - 1; t <= na->alap - 1; ++t) {
739                                         snprintf(buf, sizeof(buf), "n%u_%s_%u",
740                                                 get_irn_idx(irn), na->type_info[tp_idx].tp->name, t);
741                                         na->ilp_vars[cur_var++] = lpp_add_var(lpp, buf, lpp_binary, (double)(t + 1));
742                                         n_var++;
743                                         num_block_var++;
744                                         DBG((env->dbg, LEVEL_4, "\t\tcreated ILP variable %s\n", buf));
745                                 }
746                         }
747
748                         DB((env->dbg, LEVEL_3, "%u variables created\n", n_var));
749                         num_nodes++;
750                 }
751                 lc_timer_pop();
752                 DBG((env->dbg, LEVEL_1, "... %u variables for %u nodes created (%g sec)\n",
753                         num_block_var, num_nodes, lc_timer_elapsed_usec(t_var) / 1000000.0));
754
755                 /*
756                         1st:
757                         - the assignment constraints:
758                                 assure each node is executed once by exactly one (allowed) execution unit
759                         - the precedence constraints:
760                                 assure that no data dependencies are violated
761                 */
762                 num_cst_assign = num_cst_prec = num_cst_resrc = num_cst_bundle = 0;
763                 DBG((env->dbg, LEVEL_1, "Creating constraints for nodes in %+F:\n", block));
764                 foreach_linked_irns(ba->head_ilp_nodes, irn) {
765                         int                  cst, tp_idx, i;
766                         unsigned             cur_var;
767                         be_ilpsched_irn_t    *node;
768                         ilpsched_node_attr_t *na;
769
770                         /* the assignment constraint */
771                         lc_timer_push(t_cst_assign);
772                         snprintf(buf, sizeof(buf), "assignment_cst_n%u", get_irn_idx(irn));
773                         cst = lpp_add_cst_uniq(lpp, buf, lpp_equal, 1.0);
774                         DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
775                         num_cst_assign++;
776
777                         node    = get_ilpsched_irn(env, irn);
778                         na      = get_ilpsched_node_attr(node);
779                         cur_var = 0;
780
781                         lpp_set_factor_fast_bulk(lpp, cst, na->ilp_vars, na->n_unit_types * VALID_SCHED_INTERVAL(na), 1.0);
782                         lc_timer_pop();
783
784                         /* the precedence constraints */
785                         lc_timer_push(t_cst_prec);
786                         bs_block_irns = bitset_clear_all(bs_block_irns);
787                         for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
788                                 ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
789                                 unsigned             t_low, t_high;
790                                 be_ilpsched_irn_t    *pred_node;
791                                 ilpsched_node_attr_t *pna;
792
793                                 if (be_is_Keep(pred))
794                                         pred = skip_Proj(get_irn_n(pred, 0));
795
796                                 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
797                                         continue;
798
799                                 pred_node = get_ilpsched_irn(env, pred);
800                                 pna       = get_ilpsched_node_attr(pred_node);
801
802                                 assert(pna->asap > 0 && pna->alap >= pna->asap && "Invalid scheduling interval.");
803
804                                 if (! bitset_is_set(bs_block_irns, pna->block_idx))
805                                         bitset_set(bs_block_irns, pna->block_idx);
806                                 else
807                                         continue;
808
809                                 /* irn = n, pred = m */
810                                 t_low  = MAX(na->asap, pna->asap);
811                                 t_high = MIN(na->alap, pna->alap);
812                                 for (t = t_low - 1; t <= t_high - 1; ++t) {
813                                         int tn, tm, cur_idx;
814                                         int int_na       = (t >= na->asap - 1) ? t - na->asap + 2 : 0;
815                                         int int_pna      = (t < pna->alap)     ? pna->alap - t    : 0;
816                                         int *tmp_var_idx = NEW_ARR_F(int, int_na * na->n_unit_types + int_pna * pna->n_unit_types);
817
818                                         snprintf(buf, sizeof(buf), "precedence_n%u_n%u_%u", get_irn_idx(pred), get_irn_idx(irn), t);
819                                         cst = lpp_add_cst_uniq(lpp, buf, lpp_less, 1.0);
820                                         DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
821                                         num_cst_prec++;
822
823                                         cur_idx = 0;
824
825                                         /* lpp_set_factor_fast_bulk needs variables sorted ascending by index */
826                                         if (na->ilp_vars[0] < pna->ilp_vars[0]) {
827                                                 /* node variables have smaller index than pred variables */
828                                                 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
829                                                         for (tn = na->asap - 1; tn <= t; ++tn) {
830                                                                 unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
831                                                                 tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
832                                                         }
833                                                 }
834
835                                                 for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
836                                                         for (tm = t; tm < pna->alap; ++tm) {
837                                                                 unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
838                                                                 tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
839                                                         }
840                                                 }
841                                         }
842                                         else {
843                                                 /* pred variables have smaller index than node variables */
844                                                 for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
845                                                         for (tm = t; tm < pna->alap; ++tm) {
846                                                                 unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
847                                                                 tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
848                                                         }
849                                                 }
850
851                                                 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
852                                                         for (tn = na->asap - 1; tn <= t; ++tn) {
853                                                                 unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
854                                                                 tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
855                                                         }
856                                                 }
857                                         }
858
859                                         lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
860
861                                         DEL_ARR_F(tmp_var_idx);
862                                 }
863                         }
864                         lc_timer_pop();
865                 }
866                 DBG((env->dbg, LEVEL_1, "\t%u assignement constraints (%g sec)\n",
867                         num_cst_assign, lc_timer_elapsed_usec(t_cst_assign) / 1000000.0));
868                 DBG((env->dbg, LEVEL_1, "\t%u precedence constraints (%g sec)\n",
869                         num_cst_prec, lc_timer_elapsed_usec(t_cst_prec) / 1000000.0));
870
871                 /*
872                         2nd: the resource constraints:
873                         assure that for each time step not more instructions are scheduled
874                         to the same unit types as units of this type are available
875                 */
876                 lc_timer_push(t_cst_rsrc);
877                 for (glob_type_idx = env->cpu->n_unit_types - 1; glob_type_idx >= 0; --glob_type_idx) {
878                         unsigned t;
879                         be_execution_unit_type_t *cur_tp = &env->cpu->unit_types[glob_type_idx];
880
881                         for (t = 0; t < ba->n_interesting_nodes; ++t) {
882                                 int cst;
883                                 int *tmp_var_idx = NEW_ARR_F(int, ba->n_interesting_nodes);
884                                 int cur_idx      = 0;
885
886                                 snprintf(buf, sizeof(buf), "resource_cst_%s_%u", cur_tp->name, t);
887                                 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)cur_tp->n_units);
888                                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
889                                 num_cst_resrc++;
890
891                                 foreach_linked_irns(ba->head_ilp_nodes, irn) {
892                                         be_ilpsched_irn_t    *node = get_ilpsched_irn(env, irn);
893                                         ilpsched_node_attr_t *na   = get_ilpsched_node_attr(node);
894                                         int                  tp_idx;
895
896                                         tp_idx = is_valid_unit_type_for_node(cur_tp, node);
897
898                                         if (tp_idx >= 0 && t >= na->asap - 1 && t <= na->alap - 1) {
899                                                 int cur_var = ILPVAR_IDX(na, tp_idx, t);
900                                                 tmp_var_idx[cur_idx++] = na->ilp_vars[cur_var];
901                                         }
902                                 }
903
904                                 if (cur_idx)
905                                         lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
906
907                                 DEL_ARR_F(tmp_var_idx);
908                         }
909                 }
910                 lc_timer_pop();
911                 DBG((env->dbg, LEVEL_1, "\t%u resource constraints (%g sec)\n",
912                         num_cst_resrc, lc_timer_elapsed_usec(t_cst_rsrc) / 1000000.0));
913
914                 /*
915                         3rd: bundle constraints:
916                         assure, at most bundle_size * bundles_per_cycle instructions
917                         can be started at a certain point.
918                 */
919                 lc_timer_push(t_cst_bundle);
920                 for (t = 0; t < ba->n_interesting_nodes; ++t) {
921                         int cst;
922                         int *tmp_var_idx = NEW_ARR_F(int, 0);
923
924                         snprintf(buf, sizeof(buf), "bundle_cst_%u", t);
925                         cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)n_instr_max);
926                         DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
927                         num_cst_bundle++;
928
929                         foreach_linked_irns(ba->head_ilp_nodes, irn) {
930                                 be_ilpsched_irn_t    *node;
931                                 ilpsched_node_attr_t *na;
932                                 int                  tp_idx;
933
934                                 node = get_ilpsched_irn(env, irn);
935                                 na   = get_ilpsched_node_attr(node);
936
937                                 if (t >= na->asap - 1 && t <= na->alap - 1) {
938                                         for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
939                                                 int idx = ILPVAR_IDX(na, tp_idx, t);
940                                                 ARR_APP1(int, tmp_var_idx, na->ilp_vars[idx]);
941                                         }
942                                 }
943                         }
944
945                         if (ARR_LEN(tmp_var_idx) > 0)
946                                 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, ARR_LEN(tmp_var_idx), 1.0);
947
948                         DEL_ARR_F(tmp_var_idx);
949                 }
950                 lc_timer_pop();
951                 DBG((env->dbg, LEVEL_1, "\t%u bundle constraints (%g sec)\n",
952                         num_cst_bundle, lc_timer_elapsed_usec(t_cst_bundle) / 1000000.0));
953
954                 DBG((env->dbg, LEVEL_1, "ILP to solve: %u variables, %u constraints\n", lpp->var_next, lpp->cst_next));
955
956                 /* debug stuff, dump lpp when debugging is on  */
957                 DEBUG_ONLY(
958                         if (firm_dbg_get_mask(env->dbg) > 0) {
959                                 FILE *f;
960
961                                 snprintf(buf, sizeof(buf), "lpp_block_%lu.txt", get_irn_node_nr(block));
962                                 f = fopen(buf, "w");
963                                 lpp_dump_plain(lpp, f);
964                                 fclose(f);
965                                 snprintf(buf, sizeof(buf), "lpp_block_%lu.mps", get_irn_node_nr(block));
966                                 lpp_dump(lpp, "buf");
967                         }
968                 );
969
970                 /* set solve time limit */
971                 lpp_set_time_limit(lpp, env->opts->time_limit);
972
973                 /* set logfile if requested */
974                 if (strlen(env->opts->log_file) > 0) {
975                         if (strcasecmp(env->opts->log_file, "stdout") == 0)
976                                 lpp_set_log(lpp, stdout);
977                         else if (strcasecmp(env->opts->log_file, "stderr") == 0)
978                                 lpp_set_log(lpp, stderr);
979                         else {
980                                 logfile = fopen(env->opts->log_file, "w");
981                                 if (! logfile)
982                                         fprintf(stderr, "Could not open logfile '%s'! Logging disabled.\n", env->opts->log_file);
983                                 else
984                                         lpp_set_log(lpp, logfile);
985                         }
986                 }
987
988                 /* solve the ILP */
989                 lpp_solve_net(lpp, env->main_env->options->ilp_server, env->main_env->options->ilp_solver);
990
991                 if (logfile)
992                         fclose(logfile);
993
994                 /* check for valid solution */
995                 if (! lpp_is_sol_valid(lpp)) {
996                         FILE *f;
997
998                         snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.txt", get_irn_node_nr(block));
999                         f = fopen(buf, "w");
1000                         lpp_dump_plain(lpp, f);
1001                         fclose(f);
1002                         snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.mps", get_irn_node_nr(block));
1003                         lpp_dump(lpp, buf);
1004                         dump_ir_block_graph(env->irg, "-assert");
1005
1006                         assert(0 && "ILP solution is not feasible!");
1007                 }
1008
1009                 DBG((env->dbg, LEVEL_1, "\nSolution:\n"));
1010                 DBG((env->dbg, LEVEL_1, "\tsend time: %g sec\n", lpp->send_time / 1000000.0));
1011                 DBG((env->dbg, LEVEL_1, "\treceive time: %g sec\n", lpp->recv_time / 1000000.0));
1012                 DBG((env->dbg, LEVEL_1, "\titerations: %d\n", lpp->iterations));
1013                 DBG((env->dbg, LEVEL_1, "\tsolution time: %g\n", lpp->sol_time));
1014                 DBG((env->dbg, LEVEL_1, "\tobjective function: %g\n", LPP_VALUE_IS_0(lpp->objval) ? 0.0 : lpp->objval));
1015                 DBG((env->dbg, LEVEL_1, "\tbest bound: %g\n", LPP_VALUE_IS_0(lpp->best_bound) ? 0.0 : lpp->best_bound));
1016
1017                 obstack_free(&var_obst, NULL);
1018         }
1019
1020         /* apply solution */
1021         apply_solution(env, lpp, block);
1022
1023         if (lpp)
1024                 free_lpp(lpp);
1025 }
1026
1027 /**
1028  * Perform ILP scheduling on the given irg.
1029  */
1030 void be_ilp_sched(const be_irg_t *birg) {
1031         be_ilpsched_env_t env;
1032         const char        *name = "be ilp scheduling";
1033
1034         FIRM_DBG_REGISTER(env.dbg, "firm.be.sched.ilp");
1035
1036         //firm_dbg_set_mask(env.dbg, 31);
1037
1038         env.irg        = birg->irg;
1039         env.main_env   = birg->main_env;
1040         env.alap_queue = new_waitq();
1041         env.arch_env   = birg->main_env->arch_env;
1042         env.cpu        = arch_isa_get_machine(birg->main_env->arch_env->isa);
1043         env.opts       = &ilp_opts;
1044         phase_init(&env.ph, name, env.irg, PHASE_DEFAULT_GROWTH, init_ilpsched_irn);
1045
1046         irg_walk_in_or_dep_graph(env.irg, NULL, build_block_idx, &env);
1047
1048         /*
1049                 The block indicees are completely build after the walk,
1050                 now we can allocate the bitsets (size depends on block indicees)
1051                 for all nodes.
1052         */
1053         phase_reinit_irn_data(&env.ph);
1054
1055         /* Collect all root nodes (having no user in their block) and calculate ASAP. */
1056         irg_walk_in_or_dep_blkwise_graph(env.irg, collect_alap_root_nodes, calculate_irn_asap, &env);
1057
1058         /* calculate ALAP and create variables */
1059         irg_block_walk_graph(env.irg, calculate_block_alap, create_ilp, &env);
1060
1061         DEBUG_ONLY(
1062                 if (firm_dbg_get_mask(env.dbg)) {
1063                         phase_stat_t stat;
1064                         phase_stat_t *stat_ptr = phase_stat(&env.ph, &stat);
1065
1066                         fprintf(stderr, "Phase used: %u bytes\n", stat_ptr->overall_bytes);
1067                 }
1068         );
1069
1070         /* free all allocated object */
1071         del_waitq(env.alap_queue);
1072         phase_free(&env.ph);
1073 }
1074
1075 #ifdef WITH_LIBCORE
1076 /**
1077  * Register ILP scheduler options.
1078  */
1079 void ilpsched_register_options(lc_opt_entry_t *grp) {
1080         static int     run_once = 0;
1081         lc_opt_entry_t *sched_grp;
1082
1083         if (! run_once) {
1084                 run_once  = 1;
1085                 sched_grp = lc_opt_get_grp(grp, "ilpsched");
1086
1087                 lc_opt_add_table(sched_grp, ilpsched_option_table);
1088         }
1089 }
1090 #endif /* WITH_LIBCORE */
1091
1092 #else /* WITH_ILP */
1093
1094 static int some_picky_compiler_do_not_allow_empty_files;
1095
1096 #endif /* WITH_ILP */