finalized ilp scheduler
[libfirm] / ir / be / beilpsched.c
1 /**
2  * Scheduling algorithms.
3  * An ILP scheduler based on
4  * "ILP-based Instruction Scheduling for IA-64"
5  * by Daniel Kaestner and Sebastian Winkel
6  *
7  * @date   22.10.2005
8  * @author Christian Wuerdig
9  * @cvs-id $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #ifdef WITH_ILP
16
17 #include <math.h>
18
19 #include "irnode_t.h"
20 #include "irgwalk.h"
21 #include "irbitset.h"
22 #include "irphase_t.h"
23 #include "iredges.h"
24 #include "debug.h"
25 #include "pdeq.h"
26 #include "irtools.h"
27
28 #include <lpp/lpp.h>
29 #include <lpp/lpp_net.h>
30
31 #include "be.h"
32 #include "benode_t.h"
33
34 /* attributes for a node */
35 typedef struct _ilpsched_node_attr_t {
36         unsigned asap;                     /**< The ASAP scheduling control step */
37         unsigned alap;                     /**< The ALAP scheduling control step */
38         unsigned sched_point;              /**< the step in which the node is finally scheduled */
39         unsigned visit_idx;                /**< Index of the node having visited this node last */
40         unsigned block_idx : 31;           /**< A unique per block index */
41         unsigned enqueued  : 1;            /**< Node is already enqueued for ALAP calculation */
42         bitset_t *transitive_block_nodes;  /**< Set of transitive block nodes (predecessors
43                                                                                         for ASAP, successors for ALAP */
44         unsigned n_units;                  /**< number of allowed execution units */
45         const be_execution_unit_t **units; /**< list of allowed execution units */
46         int      *ilp_vars;                /**< the binary ilp variables x_{nt}^k assigned to this node
47                                                                                         (== 1 iff: node n is executed at step t on execunit k
48                                                                                         So we have: |ASAP(n) ... ALAP(n)| * |execunits| variables
49                                                                                 */
50 } ilpsched_node_attr_t;
51
52 /* attributes for a block */
53 typedef struct _ilpsched_block_attr_t {
54         unsigned block_last_idx;        /**< The highest node index in block so far */
55         unsigned n_interesting_nodes;   /**< The number of nodes interesting for scheduling */
56         waitq    *root_nodes;           /**< A queue of nodes having no user in current block */
57         ir_node  *head_ilp_nodes;       /**< A linked list of nodes which will contribute to ILP */
58 } ilpsched_block_attr_t;
59
60 typedef union _ilpsched_attr_ {
61         ilpsched_node_attr_t  node_attr;
62         ilpsched_block_attr_t block_attr;
63 } ilpsched_attr_t;
64
65 /* A irn for the phase and it's attributes (either node or block) */
66 typedef struct {
67         ir_node         *irn;
68         ilpsched_attr_t attr;
69 } be_ilpsched_irn_t;
70
71 typedef struct {
72         phase_t             ph;            /**< The phase */
73         ir_graph            *irg;          /**< The current irg */
74         waitq               *alap_queue;   /**< An queue of nodes waiting for final ALAP calculation */
75         const arch_env_t    *arch_env;
76         const be_main_env_t *main_env;
77         const be_machine_t  *cpu;          /**< the current abstract machine */
78         DEBUG_ONLY(firm_dbg_module_t *dbg);
79 } be_ilpsched_env_t;
80
81 #define get_ilpsched_irn(ilpsched_env, irn) (phase_get_or_set_irn_data(&(ilpsched_env)->ph, (irn)))
82 #define is_ilpsched_block(node)             (is_Block((node)->irn))
83 #define get_ilpsched_block_attr(block)      (&(block)->attr.block_attr)
84 #define get_ilpsched_node_attr(node)        (&(node)->attr.node_attr)
85
86 #define foreach_linked_irns(head, iter) for ((iter) = (head); (iter); (iter) = get_irn_link((iter)))
87
88 #define consider_for_sched(irn) \
89         (! (is_Block(irn) || is_Proj(irn) || is_Phi(irn) || be_is_Keep(irn) || is_NoMem(irn) || is_Jmp(irn)))
90
91 #define VALID_SCHED_INTERVAL(na) ((na)->alap - (na)->asap + 1)
92
93 #define ILPVAR_IDX(na, unit, control_step) \
94         ((unit) * VALID_SCHED_INTERVAL((na)) + (control_step) - (na)->asap + 1)
95
96 #define LPP_VALUE_IS_0(dbl) (fabs((dbl)) <= 1e-10)
97
98 /**
99  * In case there is no phase information for irn, initialize it.
100  */
101 static void *init_ilpsched_irn(phase_t *ph, ir_node *irn, void *old) {
102         be_ilpsched_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
103
104         if (res == old) {
105                 if (! is_Block(irn)) {
106                         ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
107
108                         if (! na->transitive_block_nodes) {
109                                 ir_node               *block      = get_nodes_block(irn);
110                                 be_ilpsched_irn_t     *block_node = phase_get_or_set_irn_data(ph, block);
111                                 ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
112
113                                 /* we are called after the block indicees have been build: create bitset */
114                                 na->transitive_block_nodes = bitset_obstack_alloc(phase_obst(ph), ba->block_last_idx);
115                         }
116                         else {
117                                 /* we are called from reinit block data: clear the bitset */
118                                 bitset_clear_all(na->transitive_block_nodes);
119                                 na->visit_idx = 0;
120                         }
121                 }
122                 return old;
123         }
124
125         res->irn = irn;
126
127         if (is_Block(irn)) {
128                 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(res);
129
130                 ba->n_interesting_nodes = 0;
131                 ba->block_last_idx      = 0;
132                 ba->root_nodes          = new_waitq();
133                 ba->head_ilp_nodes      = NULL;
134         }
135         else {
136                 ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
137                 memset(na, 0, sizeof(*na));
138         }
139
140         return res;
141 }
142
143 /**
144  * Assign a per block unique number to each node.
145  */
146 static void build_block_idx(ir_node *irn, void *walk_env) {
147         be_ilpsched_env_t     *env = walk_env;
148         be_ilpsched_irn_t     *node, *block_node;
149         ilpsched_node_attr_t  *na;
150         ilpsched_block_attr_t *ba;
151
152         if (! consider_for_sched(irn))
153                 return;
154
155         node       = get_ilpsched_irn(env, irn);
156         na         = get_ilpsched_node_attr(node);
157         block_node = get_ilpsched_irn(env, get_nodes_block(irn));
158         ba         = get_ilpsched_block_attr(block_node);
159
160         na->block_idx = ba->block_last_idx++;
161 }
162
163 /**
164  * Add all nodes having no user in current block to last_nodes list.
165  */
166 static void collect_alap_root_nodes(ir_node *irn, void *walk_env) {
167         ir_node               *block;
168         const ir_edge_t       *edge;
169         be_ilpsched_irn_t     *block_node;
170         ilpsched_block_attr_t *ba;
171         be_ilpsched_env_t     *env           = walk_env;
172         int                   has_block_user = 0;
173         ir_edge_kind_t        ekind[2]       = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
174         int                   i, j;
175
176         if (! consider_for_sched(irn))
177                 return;
178
179         block = get_nodes_block(irn);
180
181         DBG((env->dbg, LEVEL_3, "%+F (%+F) is interesting, examining ... ", irn, block));
182
183         for (i = 0; i < 2; ++i) {
184                 foreach_out_edge_kind(irn, edge, ekind[i]) {
185                         ir_node *user = get_edge_src_irn(edge);
186
187                         if (is_Proj(user)) {
188                                 const ir_edge_t *user_edge;
189
190                                 if (get_irn_mode(user) == mode_X)
191                                         continue;
192
193                                 /* The ABI ensures, that there will be no ProjT nodes in the graph. */
194                                 for (j = 0; j < 2; ++j) {
195                                         foreach_out_edge_kind(user, user_edge, ekind[j]) {
196                                                 ir_node *real_user = get_edge_src_irn(user_edge);
197
198                                                 if (! is_Phi(real_user) && ! be_is_Keep(real_user) && get_nodes_block(real_user) == block) {
199                                                         has_block_user = 1;
200                                                         break;
201                                                 }
202                                         }
203                                 }
204
205                                 if (has_block_user)
206                                         break;
207                         }
208                         else if (is_Block(user)) {
209                                 continue;
210                         }
211                         else if (! is_Phi(user) && ! be_is_Keep(user) && get_nodes_block(user) == block) {
212                                 has_block_user = 1;
213                                 break;
214                         }
215                 }
216         }
217
218         block_node = get_ilpsched_irn(env, block);
219         ba         = get_ilpsched_block_attr(block_node);
220
221         ba->n_interesting_nodes++;
222
223         /* current irn has no user inside this block, add to queue */
224         if (! has_block_user) {
225                 DB((env->dbg, LEVEL_3, "root node\n"));
226                 waitq_put(ba->root_nodes, irn);
227         }
228         else {
229                 DB((env->dbg, LEVEL_3, "normal node\n"));
230         }
231 }
232
233 /**
234  * Calculate the ASAP scheduling step for current irn.
235  */
236 static void calculate_irn_asap(ir_node *irn, void *walk_env) {
237         be_ilpsched_irn_t *node;
238         be_ilpsched_env_t *env = walk_env;
239         int      i;
240         ir_node  *block;
241         ilpsched_node_attr_t *na;
242
243         /* These nodes are handled separate */
244         if (! consider_for_sched(irn))
245                 return;
246
247         DBG((env->dbg, LEVEL_2, "Calculating ASAP of node %+F\n", irn));
248
249         node  = get_ilpsched_irn(env, irn);
250         block = get_nodes_block(irn);
251         na    = get_ilpsched_node_attr(node);
252
253         /* accumulate all transitive predecessors of current node */
254         for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
255                 ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
256                 be_ilpsched_irn_t    *pred_node;
257                 ilpsched_node_attr_t *pna;
258                 unsigned             idx;
259
260                 if (be_is_Keep(pred))
261                         pred = skip_Proj(get_irn_n(pred, 0));
262
263                 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
264                         continue;
265
266                 pred_node = get_ilpsched_irn(env, pred);
267                 pna       = get_ilpsched_node_attr(pred_node);
268                 idx       = get_irn_idx(irn);
269
270                 assert(pna->asap && "missing ASAP of predecessor");
271
272                 /*
273                         We have not already visited this predecessor
274                         -> accumulate it's predecessors
275                 */
276                 if (pna->visit_idx != idx) {
277                         pna->visit_idx = idx;
278                         na->transitive_block_nodes = bitset_or(na->transitive_block_nodes, pna->transitive_block_nodes);
279                         DBG((env->dbg, LEVEL_3, "\taccumulating preds of %+F\n", pred));
280                 }
281         }
282
283         /* every node is it's own transitive predecessor in block */
284         bitset_set(na->transitive_block_nodes, na->block_idx);
285
286         /* asap = number of transitive predecessors in this block */
287         na->asap = bitset_popcnt(na->transitive_block_nodes);
288
289         DBG((env->dbg, LEVEL_2, "\tcalculated ASAP is %u\n", na->asap));
290 }
291
292 /**
293  * Accumulate the successors of all nodes from irn on upwards.
294  */
295 static void accumulate_succs(be_ilpsched_env_t *env, ir_node *irn) {
296         unsigned             i, n;
297         be_ilpsched_irn_t    *node  = get_ilpsched_irn(env, irn);
298         ilpsched_node_attr_t *na    = get_ilpsched_node_attr(node);
299         ir_node              *block = get_nodes_block(irn);
300         waitq                *wq    = new_waitq();
301
302         DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F\n", irn));
303
304         /* enqueue node for final alap calculation */
305         if (! na->enqueued) {
306                 be_ilpsched_irn_t     *block_node = get_ilpsched_irn(env, block);
307                 ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
308
309                 na->enqueued = 1;
310                 na->alap     = ba->n_interesting_nodes;
311                 waitq_put(env->alap_queue, node);
312
313                 set_irn_link(irn, ba->head_ilp_nodes);
314                 ba->head_ilp_nodes = irn;
315                 DBG((env->dbg, LEVEL_5, "\t\tlinked %+F to ilp nodes of %+F, attr %p\n", irn, block, ba));
316                 DBG((env->dbg, LEVEL_4, "\t\tenqueueing %+F for final ALAP calculation\n", irn));
317         }
318
319         for (i = 0, n = get_irn_ins_or_deps(irn); i < n; ++i) {
320                 ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
321                 unsigned             idx;
322                 be_ilpsched_irn_t    *pred_node;
323                 ilpsched_node_attr_t *pna;
324
325                 if (be_is_Keep(pred))
326                         pred = skip_Proj(get_irn_n(pred, 0));
327
328                 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
329                         continue;
330
331                 pred_node = get_ilpsched_irn(env, pred);
332                 pna       = get_ilpsched_node_attr(pred_node);
333                 idx       = get_irn_idx(irn);
334
335                 /* accumulate the successors */
336                 if (pna->visit_idx != idx) {
337                         pna->visit_idx = idx;
338                         pna->transitive_block_nodes = bitset_or(pna->transitive_block_nodes, na->transitive_block_nodes);
339
340                         /* set current node as successor */
341                         bitset_set(pna->transitive_block_nodes, na->block_idx);
342                         waitq_put(wq, pred);
343
344                         DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F to %+F\n", irn, pred));
345                 }
346         }
347
348         /* process all predecessors */
349         while (! waitq_empty(wq)) {
350                 accumulate_succs(env, waitq_get(wq));
351         }
352
353         del_waitq(wq);
354 }
355
356 /**
357  * Calculate the ALAP scheduling step of all irns in current block.
358  * Depends on ASAP beeing calculated.
359  */
360 static void calculate_block_alap(ir_node *block, void *walk_env) {
361         be_ilpsched_env_t     *env        = walk_env;
362         be_ilpsched_irn_t     *block_node = get_ilpsched_irn(env, block);
363         ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
364
365         assert(is_Block(block));
366
367         DBG((env->dbg, LEVEL_2, "Calculating ALAP for nodes in %+F (%u nodes)\n", block, ba->n_interesting_nodes));
368
369         /* TODO: Might be faster to use out edges and call phase_reinit_single_irn_data */
370         phase_reinit_block_irn_data(&env->ph, block);
371
372         /* calculate the alap of all nodes, starting at collected roots upwards */
373         while (! waitq_empty(ba->root_nodes)) {
374                 accumulate_succs(env, waitq_get(ba->root_nodes));
375         }
376
377         /* we don't need it anymore */
378         del_waitq(ba->root_nodes);
379         ba->root_nodes = NULL;
380
381         /* all interesting nodes should have their successors accumulated now */
382         while (! waitq_empty(env->alap_queue)) {
383                 be_ilpsched_irn_t    *node = waitq_get(env->alap_queue);
384                 ilpsched_node_attr_t *na   = get_ilpsched_node_attr(node);
385
386                 na->alap -= bitset_popcnt(na->transitive_block_nodes);
387                 DBG((env->dbg, LEVEL_2, "\tALAP of %+F is %u (%u succs)\n", node->irn, na->alap, bitset_popcnt(na->transitive_block_nodes)));
388         }
389 }
390
391 /**
392  * Check if node can be executed on given unit.
393  */
394 static INLINE int is_valid_unit_for_node(const be_execution_unit_t *unit, be_ilpsched_irn_t *node) {
395         int                  i;
396         ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
397
398         for (i = na->n_units - 1; i >= 0; --i) {
399                 if (na->units[i] == unit)
400                         return i;
401         }
402
403         return -1;
404 }
405
406 /**
407  * Create the ilp (add variables, build constraints, solve, build schedule from solution).
408  */
409 static void create_ilp(ir_node *block, void *walk_env) {
410         be_ilpsched_env_t     *env          = walk_env;
411         be_ilpsched_irn_t     *block_node   = get_ilpsched_irn(env, block);
412         ilpsched_block_attr_t *ba           = get_ilpsched_block_attr(block_node);
413         unsigned              num_block_var = 0;
414         unsigned              num_nodes     = 0;
415         unsigned              n_instr_max   = env->cpu->bundle_size * env->cpu->bundels_per_cycle;
416         unsigned              num_cst_assign, num_cst_prec, num_cst_resrc, num_cst_bundle;
417         unsigned              t;
418         int                   glob_type_idx, glob_unit_idx;
419         ir_node               *irn;
420         char                  buf[1024];
421         lpp_t                 *lpp;
422         struct obstack        var_obst;
423
424         DBG((env->dbg, 255, "\n\n\n=========================================\n"));
425         DBG((env->dbg, 255, "  ILP Scheduling for %+F\n", block));
426         DBG((env->dbg, 255, "=========================================\n\n"));
427
428         DBG((env->dbg, LEVEL_1, "Creating ILP Variables for nodes in %+F (%u interesting nodes)\n",
429                 block, ba->n_interesting_nodes));
430
431         lpp = new_lpp("be ilp scheduling", lpp_minimize);
432         obstack_init(&var_obst);
433
434         foreach_linked_irns(ba->head_ilp_nodes, irn) {
435                 const be_execution_unit_t ***execunits = arch_isa_get_allowed_execution_units(env->arch_env->isa, irn);
436                 be_ilpsched_irn_t         *node;
437                 ilpsched_node_attr_t      *na;
438                 unsigned                  n_units, tp_idx, unit_idx, cur_var, n_var, cur_unit;
439
440                 /* count number of available units for this node */
441                 for (n_units = tp_idx = 0; execunits[tp_idx]; ++tp_idx)
442                         for (unit_idx = 0; execunits[tp_idx][unit_idx]; ++unit_idx)
443                                 n_units++;
444
445         node = get_ilpsched_irn(env, irn);
446         na   = get_ilpsched_node_attr(node);
447
448                 na->n_units = n_units;
449                 na->units   = phase_alloc(&env->ph, n_units * sizeof(na->units[0]));
450
451                 /* allocate space for ilp variables */
452                 na->ilp_vars = NEW_ARR_D(int, &var_obst, n_units * VALID_SCHED_INTERVAL(na));
453                 memset(na->ilp_vars, -1, n_units * VALID_SCHED_INTERVAL(na) * sizeof(na->ilp_vars[0]));
454
455                 DBG((env->dbg, LEVEL_3, "\thandling %+F (asap %u, alap %u, units %u):\n",
456                         irn, na->asap, na->alap, na->n_units));
457
458                 cur_var = cur_unit = n_var = 0;
459                 /* create variables */
460                 for (tp_idx = 0; execunits[tp_idx]; ++tp_idx) {
461                         for (unit_idx = 0; execunits[tp_idx][unit_idx]; ++unit_idx) {
462                                 na->units[cur_unit++] = execunits[tp_idx][unit_idx];
463
464                                 for (t = na->asap - 1; t <= na->alap - 1; ++t) {
465                                         snprintf(buf, sizeof(buf), "n%u_%s_%u",
466                                                 get_irn_idx(irn), execunits[tp_idx][unit_idx]->name, t);
467                                         na->ilp_vars[cur_var++] = lpp_add_var(lpp, buf, lpp_binary, (double)(t + 1));
468                                         n_var++;
469                                         num_block_var++;
470                                         DBG((env->dbg, LEVEL_4, "\t\tcreated ILP variable %s\n", buf));
471                                 }
472                         }
473                 }
474
475                 DB((env->dbg, LEVEL_3, "%u variables created\n", n_var));
476                 num_nodes++;
477         }
478
479         DBG((env->dbg, LEVEL_1, "... %u variables for %u nodes created\n", num_block_var, num_nodes));
480
481         /*
482                 1st:
483                 - the assignment constraints:
484                         assure each node is executed once by exactly one (allowed) execution unit
485                 - the precedence constraints:
486                         assure that no data dependecies are violated
487         */
488         num_cst_assign = num_cst_prec = num_cst_resrc = num_cst_bundle = 0;
489         DBG((env->dbg, LEVEL_1, "Creating constraints for nodes in %+F:\n", block));
490         foreach_linked_irns(ba->head_ilp_nodes, irn) {
491                 int                  cst, unit_idx, i;
492                 unsigned             cur_var;
493                 be_ilpsched_irn_t    *node;
494                 ilpsched_node_attr_t *na;
495
496                 /* the assignment constraint */
497                 snprintf(buf, sizeof(buf), "assignment_cst_n%u", get_irn_idx(irn));
498                 cst = lpp_add_cst_uniq(lpp, buf, lpp_equal, 1.0);
499                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
500                 num_cst_assign++;
501
502                 node    = get_ilpsched_irn(env, irn);
503                 na      = get_ilpsched_node_attr(node);
504                 cur_var = 0;
505
506                 for (unit_idx = na->n_units - 1; unit_idx >= 0; --unit_idx) {
507                         for (t = na->asap - 1; t <= na->alap - 1; ++t) {
508                                 lpp_set_factor_fast(lpp, cst, na->ilp_vars[cur_var++], 1.0);
509                         }
510                 }
511
512                 /* the precedence constraints */
513                 for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
514                         ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
515                         unsigned             t_low, t_high;
516                         be_ilpsched_irn_t    *pred_node;
517                         ilpsched_node_attr_t *pna;
518
519                         if (be_is_Keep(pred))
520                                 pred = skip_Proj(get_irn_n(pred, 0));
521
522                         if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
523                                 continue;
524
525                         pred_node = get_ilpsched_irn(env, pred);
526                         pna       = get_ilpsched_node_attr(pred_node);
527
528                         assert(pna->asap > 0 && pna->alap >= pna->asap && "Invalid scheduling interval.");
529
530                         /* irn = n, pred = m */
531                         t_low  = MAX(na->asap, pna->asap);
532                         t_high = MIN(na->alap, pna->alap);
533                         for (t = t_low - 1; t <= t_high - 1; ++t) {
534                                 int tn, tm;
535
536                                 snprintf(buf, sizeof(buf), "precedence_n%u_n%u_%u", get_irn_idx(pred), get_irn_idx(irn), t);
537                                 cst = lpp_add_cst(lpp, buf, lpp_less, 1.0);
538                                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
539                                 num_cst_prec++;
540
541                                 for (unit_idx = na->n_units - 1; unit_idx >= 0; --unit_idx) {
542                                         for (tn = na->asap; tn <= t; ++tn) {
543                                                 unsigned idx = ILPVAR_IDX(na, unit_idx, tn);
544                                                 lpp_set_factor_fast(lpp, cst, na->ilp_vars[idx], 1.0);
545                                         }
546                                 }
547
548                                 for (unit_idx = pna->n_units - 1; unit_idx >= 0; --unit_idx) {
549                                         for (tm = t; tm < pna->alap; ++tm) {
550                                                 unsigned idx = ILPVAR_IDX(pna, unit_idx, tm);
551                                                 lpp_set_factor_fast(lpp, cst, pna->ilp_vars[idx], 1.0);
552                                         }
553                                 }
554                         }
555                 }
556         }
557         DBG((env->dbg, LEVEL_1, "\t%u assignement constraints\n", num_cst_assign));
558         DBG((env->dbg, LEVEL_1, "\t%u precedence constraints\n", num_cst_prec));
559
560         /*
561                 2nd: the ressource constraints:
562                 assure that foreach timestep no more than one instruction is scheduled to same unit
563         */
564         for (glob_type_idx = env->cpu->n_unit_types - 1; glob_type_idx >= 0; --glob_type_idx) {
565                 for (glob_unit_idx = env->cpu->unit_types[glob_type_idx].n_units - 1; glob_unit_idx >= 0; --glob_unit_idx) {
566                         unsigned t;
567                         be_execution_unit_t *cur_unit = &env->cpu->unit_types[glob_type_idx].units[glob_unit_idx];
568
569                         for (t = 0; t < ba->n_interesting_nodes; ++t) {
570                                 int cst;
571
572                                 snprintf(buf, sizeof(buf), "resource_cst_%s_%u", cur_unit->name, t);
573                                 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, 1.0);
574                                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
575                                 num_cst_resrc++;
576
577                                 foreach_linked_irns(ba->head_ilp_nodes, irn) {
578                                         be_ilpsched_irn_t    *node = get_ilpsched_irn(env, irn);
579                                         ilpsched_node_attr_t *na   = get_ilpsched_node_attr(node);
580                                         int                  unit_idx;
581
582                                         unit_idx = is_valid_unit_for_node(cur_unit, node);
583
584                                         if (unit_idx >= 0 && t >= na->asap - 1 && t <= na->alap - 1) {
585                                                 int cur_var = ILPVAR_IDX(na, unit_idx, t);
586                                                 lpp_set_factor_fast(lpp, cst, na->ilp_vars[cur_var], 1.0);
587                                         }
588                                 }
589                         }
590                 }
591         }
592         DBG((env->dbg, LEVEL_1, "\t%u resource constraints\n", num_cst_resrc));
593
594         /*
595                 3rd: bundle constraints:
596                 assure, at most bundle_size * bundles_per_cycle instructions
597                 can be started at a certain point.
598         */
599         for (t = 0; t < ba->n_interesting_nodes; ++t) {
600                 int cst;
601
602                 snprintf(buf, sizeof(buf), "bundle_cst_%u", t);
603                 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)n_instr_max);
604                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
605                 num_cst_bundle++;
606
607                 foreach_linked_irns(ba->head_ilp_nodes, irn) {
608                         be_ilpsched_irn_t    *node;
609                         ilpsched_node_attr_t *na;
610                         int                  unit_idx;
611
612                         node = get_ilpsched_irn(env, irn);
613                         na   = get_ilpsched_node_attr(node);
614
615                         if (t >= na->asap - 1 && t <= na->alap - 1) {
616                                 for (unit_idx = na->n_units - 1; unit_idx >= 0; --unit_idx) {
617                                         int idx = ILPVAR_IDX(na, unit_idx, t);
618                                         lpp_set_factor_fast(lpp, cst, na->ilp_vars[idx], 1.0);
619                                 }
620                         }
621                 }
622         }
623         DBG((env->dbg, LEVEL_1, "\t%u bundle constraints\n", num_cst_bundle));
624
625         DBG((env->dbg, LEVEL_1, "ILP to solve: %u variables, %u constraints\n", lpp->var_next, lpp->cst_next));
626
627         DEBUG_ONLY(
628                 if (firm_dbg_get_mask(env->dbg) & 64) {
629                         FILE *f;
630
631                         snprintf(buf, sizeof(buf), "lpp_block_%lu.txt", get_irn_node_nr(block));
632                         f = fopen(buf, "w");
633                         lpp_dump_plain(lpp, f);
634                         fclose(f);
635                         snprintf(buf, sizeof(buf), "lpp_block_%lu.mps", get_irn_node_nr(block));
636                         lpp_dump(lpp, "buf");
637                 }
638         );
639
640         lpp_set_time_limit(lpp, 3600);
641         lpp_set_log(lpp, stdout);
642
643         lpp_solve_net(lpp, env->main_env->options->ilp_server, env->main_env->options->ilp_solver);
644         if (! lpp_is_sol_valid(lpp)) {
645                 FILE *f;
646
647                 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.txt", get_irn_node_nr(block));
648                 f = fopen(buf, "w");
649                 lpp_dump_plain(lpp, f);
650                 fclose(f);
651                 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.mps", get_irn_node_nr(block));
652                 lpp_dump(lpp, buf);
653                 dump_ir_block_graph(env->irg, "-assert");
654
655                 assert(0 && "ILP solution is not feasible!");
656         }
657
658         DBG((env->dbg, LEVEL_1, "\nSolution:\n"));
659         DBG((env->dbg, LEVEL_1, "\titerations: %d\n", lpp->iterations));
660         DBG((env->dbg, LEVEL_1, "\tsolution time: %g\n", lpp->sol_time));
661         DBG((env->dbg, LEVEL_1, "\tobjective function: %g\n", LPP_VALUE_IS_0(lpp->objval) ? 0.0 : lpp->objval));
662         DBG((env->dbg, LEVEL_1, "\tbest bound: %g\n", LPP_VALUE_IS_0(lpp->best_bound) ? 0.0 : lpp->best_bound));
663
664         /* apply solution */
665         foreach_linked_irns(ba->head_ilp_nodes, irn) {
666                 be_ilpsched_irn_t    *node;
667                 ilpsched_node_attr_t *na;
668                 int                  unit_idx;
669                 unsigned             cur_var;
670
671                 node    = get_ilpsched_irn(env, irn);
672                 na      = get_ilpsched_node_attr(node);
673                 cur_var = 0;
674
675                 for (unit_idx = na->n_units - 1; unit_idx >= 0; --unit_idx) {
676                         for (t = na->asap - 1; t <= na->alap - 1; ++t) {
677                                 double val = lpp_get_var_sol(lpp, na->ilp_vars[cur_var++]);
678                                 if (! LPP_VALUE_IS_0(val)) {
679                                         DBG((env->dbg, LEVEL_1, "Schedpoint of %+F is %u at unit %s\n", irn, t, na->units[unit_idx]->name));
680                                 }
681                         }
682                 }
683         }
684
685         obstack_free(&var_obst, NULL);
686         free_lpp(lpp);
687 }
688
689 /**
690  * Perform ILP scheduling on the given irg.
691  */
692 void be_ilp_sched(const be_irg_t *birg) {
693         be_ilpsched_env_t env;
694         const char        *name = "be ilp scheduling";
695
696         FIRM_DBG_REGISTER(env.dbg, "firm.be.sched.ilp");
697
698         //firm_dbg_set_mask(env.dbg, 31);
699
700         env.irg        = birg->irg;
701         env.main_env   = birg->main_env;
702         env.alap_queue = new_waitq();
703         env.arch_env   = birg->main_env->arch_env;
704         env.cpu        = arch_isa_get_machine(birg->main_env->arch_env->isa);
705         phase_init(&env.ph, name, env.irg, PHASE_DEFAULT_GROWTH, init_ilpsched_irn);
706
707         irg_walk_in_or_dep_graph(env.irg, NULL, build_block_idx, &env);
708
709         /*
710                 The block indicees are completely build after the walk,
711                 now we can allocate the bitsets (size depends on block indicees)
712                 for all nodes.
713         */
714         phase_reinit_irn_data(&env.ph);
715
716         /* Collect all root nodes (having no user in their block) and calculate ASAP. */
717         irg_walk_in_or_dep_blkwise_graph(env.irg, collect_alap_root_nodes, calculate_irn_asap, &env);
718
719         /* calculate ALAP and create variables */
720         irg_block_walk_graph(env.irg, calculate_block_alap, create_ilp, &env);
721
722         DEBUG_ONLY(
723                 if (firm_dbg_get_mask(env.dbg)) {
724                         phase_stat_t stat;
725                         phase_stat_t *stat_ptr = phase_stat(&env.ph, &stat);
726
727                         fprintf(stderr, "Phase used: %u bytes\n", stat_ptr->overall_bytes);
728                 }
729         );
730
731         /* free all allocated object */
732         del_waitq(env.alap_queue);
733         phase_free(&env.ph);
734 }
735
736 #else /* WITH_ILP */
737
738 static int some_picky_compiler_do_not_allow_empty_files;
739
740 #endif /* WITH_ILP */