07a2a9dd3925a3c1d04a7f4fecbcd541174f27f8
[libfirm] / ir / be / beilpsched.c
1 /**
2  * Scheduling algorithms.
3  * An ILP scheduler based on
4  * "ILP-based Instruction Scheduling for IA-64"
5  * by Daniel Kaestner and Sebastian Winkel
6  *
7  * @date   22.10.2005
8  * @author Christian Wuerdig
9  * @cvs-id $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #ifdef WITH_ILP
16
17 #include <math.h>
18
19 #ifndef _WIN32
20 #include <strings.h>
21 #endif /* _WIN32 */
22
23 #include "irnode_t.h"
24 #include "irgwalk.h"
25 #include "irbitset.h"
26 #include "irphase_t.h"
27 #include "iredges.h"
28 #include "debug.h"
29 #include "pdeq.h"
30 #include "irtools.h"
31 #include "irdump.h"
32
33 #include <lpp/lpp.h>
34 #include <lpp/lpp_net.h>
35
36 #ifdef WITH_LIBCORE
37 #include <libcore/lc_opts.h>
38 #include <libcore/lc_opts_enum.h>
39 #include <libcore/lc_timing.h>
40 #endif /* WITH_LIBCORE */
41
42 #include "be.h"
43 #include "benode_t.h"
44
45 typedef struct _ilpsched_options_t {
46         unsigned time_limit;
47         char     log_file[1024];
48 } ilpsched_options_t;
49
50 typedef struct _unit_type_info_t {
51         int                            n_units;
52         const be_execution_unit_type_t *tp;
53 } unit_type_info_t;
54
55 /* attributes for a node */
56 typedef struct _ilpsched_node_attr_t {
57         unsigned asap;                     /**< The ASAP scheduling control step */
58         unsigned alap;                     /**< The ALAP scheduling control step */
59         unsigned sched_point;              /**< the step in which the node is finally scheduled */
60         unsigned visit_idx;                /**< Index of the node having visited this node last */
61         unsigned block_idx : 31;           /**< A unique per block index */
62         unsigned enqueued  : 1;            /**< Node is already enqueued for ALAP calculation */
63         bitset_t *transitive_block_nodes;  /**< Set of transitive block nodes (predecessors
64                                                                                         for ASAP, successors for ALAP */
65         unsigned n_unit_types;             /**< number of allowed execution unit types */
66         unit_type_info_t *type_info;       /**< list of allowed execution unit types */
67         int      *ilp_vars;                /**< the binary ilp variables x_{nt}^k assigned to this node
68                                                                                         (== 1 iff: node n is executed at step t on execunit k
69                                                                                         So we have: |ASAP(n) ... ALAP(n)| * |execunits| variables
70                                                                                 */
71 } ilpsched_node_attr_t;
72
73 /* attributes for a block */
74 typedef struct _ilpsched_block_attr_t {
75         unsigned block_last_idx;        /**< The highest node index in block so far */
76         unsigned n_interesting_nodes;   /**< The number of nodes interesting for scheduling */
77         waitq    *root_nodes;           /**< A queue of nodes having no user in current block */
78         ir_node  *head_ilp_nodes;       /**< A linked list of nodes which will contribute to ILP */
79 } ilpsched_block_attr_t;
80
81 typedef union _ilpsched_attr_ {
82         ilpsched_node_attr_t  node_attr;
83         ilpsched_block_attr_t block_attr;
84 } ilpsched_attr_t;
85
86 /* A irn for the phase and it's attributes (either node or block) */
87 typedef struct {
88         ir_node         *irn;
89         ilpsched_attr_t attr;
90 } be_ilpsched_irn_t;
91
92 typedef struct {
93         phase_t             ph;            /**< The phase */
94         ir_graph            *irg;          /**< The current irg */
95         waitq               *alap_queue;   /**< An queue of nodes waiting for final ALAP calculation */
96         const arch_env_t    *arch_env;
97         const be_main_env_t *main_env;
98         const be_machine_t  *cpu;          /**< the current abstract machine */
99         ilpsched_options_t  *opts;         /**< the ilp options for current irg */
100         DEBUG_ONLY(firm_dbg_module_t *dbg);
101 } be_ilpsched_env_t;
102
103 #define get_ilpsched_irn(ilpsched_env, irn) (phase_get_or_set_irn_data(&(ilpsched_env)->ph, (irn)))
104 #define is_ilpsched_block(node)             (is_Block((node)->irn))
105 #define get_ilpsched_block_attr(block)      (&(block)->attr.block_attr)
106 #define get_ilpsched_node_attr(node)        (&(node)->attr.node_attr)
107
108 #define foreach_linked_irns(head, iter) for ((iter) = (head); (iter); (iter) = get_irn_link((iter)))
109
110 #define consider_for_sched(irn) \
111         (! (is_Block(irn) || is_Proj(irn) || is_Phi(irn) || be_is_Keep(irn) || is_NoMem(irn) || is_Jmp(irn)))
112
113 #define VALID_SCHED_INTERVAL(na) ((na)->alap - (na)->asap + 1)
114
115 #define ILPVAR_IDX(na, unit, control_step) \
116         ((unit) * VALID_SCHED_INTERVAL((na)) + (control_step) - (na)->asap + 1)
117
118 #define LPP_VALUE_IS_0(dbl) (fabs((dbl)) <= 1e-10)
119
120 static ilpsched_options_t ilp_opts = {
121         120,   /* 120 sec per block time limit */
122         ""     /* no log file */
123 };
124
125 #ifdef WITH_LIBCORE
126 static const lc_opt_table_entry_t ilpsched_option_table[] = {
127         LC_OPT_ENT_INT("time_limit", "ILP time limit per block", &ilp_opts.time_limit),
128         LC_OPT_ENT_STR("lpp_log",    "LPP logfile (stderr and stdout are supported)", ilp_opts.log_file, sizeof(ilp_opts.log_file)),
129         { NULL }
130 };
131 #endif /* WITH_LIBCORE */
132
133 /**
134  * In case there is no phase information for irn, initialize it.
135  */
136 static void *init_ilpsched_irn(phase_t *ph, ir_node *irn, void *old) {
137         be_ilpsched_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
138
139         if (res == old) {
140                 if (! is_Block(irn)) {
141                         ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
142
143                         if (! na->transitive_block_nodes) {
144                                 ir_node               *block      = get_nodes_block(irn);
145                                 be_ilpsched_irn_t     *block_node = phase_get_or_set_irn_data(ph, block);
146                                 ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
147
148                                 /* we are called after the block indicees have been build: create bitset */
149                                 na->transitive_block_nodes = bitset_obstack_alloc(phase_obst(ph), ba->block_last_idx);
150                         }
151                         else {
152                                 /* we are called from reinit block data: clear the bitset */
153                                 bitset_clear_all(na->transitive_block_nodes);
154                                 na->visit_idx = 0;
155                         }
156                 }
157                 return old;
158         }
159
160         res->irn = irn;
161
162         if (is_Block(irn)) {
163                 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(res);
164
165                 ba->n_interesting_nodes = 0;
166                 ba->block_last_idx      = 0;
167                 ba->root_nodes          = new_waitq();
168                 ba->head_ilp_nodes      = NULL;
169         }
170         else {
171                 ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
172                 memset(na, 0, sizeof(*na));
173         }
174
175         return res;
176 }
177
178 /**
179  * Assign a per block unique number to each node.
180  */
181 static void build_block_idx(ir_node *irn, void *walk_env) {
182         be_ilpsched_env_t     *env = walk_env;
183         be_ilpsched_irn_t     *node, *block_node;
184         ilpsched_node_attr_t  *na;
185         ilpsched_block_attr_t *ba;
186
187         if (! consider_for_sched(irn))
188                 return;
189
190         node       = get_ilpsched_irn(env, irn);
191         na         = get_ilpsched_node_attr(node);
192         block_node = get_ilpsched_irn(env, get_nodes_block(irn));
193         ba         = get_ilpsched_block_attr(block_node);
194
195         na->block_idx = ba->block_last_idx++;
196 }
197
198 /**
199  * Add all nodes having no user in current block to last_nodes list.
200  */
201 static void collect_alap_root_nodes(ir_node *irn, void *walk_env) {
202         ir_node               *block;
203         const ir_edge_t       *edge;
204         be_ilpsched_irn_t     *block_node;
205         ilpsched_block_attr_t *ba;
206         be_ilpsched_env_t     *env           = walk_env;
207         int                   has_block_user = 0;
208         ir_edge_kind_t        ekind[2]       = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
209         int                   i, j;
210
211         if (! consider_for_sched(irn))
212                 return;
213
214         block = get_nodes_block(irn);
215
216         DBG((env->dbg, LEVEL_3, "%+F (%+F) is interesting, examining ... ", irn, block));
217
218         for (i = 0; i < 2; ++i) {
219                 foreach_out_edge_kind(irn, edge, ekind[i]) {
220                         ir_node *user = get_edge_src_irn(edge);
221
222                         if (is_Proj(user)) {
223                                 const ir_edge_t *user_edge;
224
225                                 if (get_irn_mode(user) == mode_X)
226                                         continue;
227
228                                 /* The ABI ensures, that there will be no ProjT nodes in the graph. */
229                                 for (j = 0; j < 2; ++j) {
230                                         foreach_out_edge_kind(user, user_edge, ekind[j]) {
231                                                 ir_node *real_user = get_edge_src_irn(user_edge);
232
233                                                 if (! is_Phi(real_user) && ! be_is_Keep(real_user) && get_nodes_block(real_user) == block) {
234                                                         has_block_user = 1;
235                                                         break;
236                                                 }
237                                         }
238                                 }
239
240                                 if (has_block_user)
241                                         break;
242                         }
243                         else if (is_Block(user)) {
244                                 continue;
245                         }
246                         else if (! is_Phi(user) && ! be_is_Keep(user) && get_nodes_block(user) == block) {
247                                 has_block_user = 1;
248                                 break;
249                         }
250                 }
251         }
252
253         block_node = get_ilpsched_irn(env, block);
254         ba         = get_ilpsched_block_attr(block_node);
255
256         ba->n_interesting_nodes++;
257
258         /* current irn has no user inside this block, add to queue */
259         if (! has_block_user) {
260                 DB((env->dbg, LEVEL_3, "root node\n"));
261                 waitq_put(ba->root_nodes, irn);
262         }
263         else {
264                 DB((env->dbg, LEVEL_3, "normal node\n"));
265         }
266 }
267
268 /**
269  * Calculate the ASAP scheduling step for current irn.
270  */
271 static void calculate_irn_asap(ir_node *irn, void *walk_env) {
272         be_ilpsched_irn_t *node;
273         be_ilpsched_env_t *env = walk_env;
274         int      i;
275         ir_node  *block;
276         ilpsched_node_attr_t *na;
277
278         /* These nodes are handled separate */
279         if (! consider_for_sched(irn))
280                 return;
281
282         DBG((env->dbg, LEVEL_2, "Calculating ASAP of node %+F\n", irn));
283
284         node  = get_ilpsched_irn(env, irn);
285         block = get_nodes_block(irn);
286         na    = get_ilpsched_node_attr(node);
287
288         /* accumulate all transitive predecessors of current node */
289         for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
290                 ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
291                 be_ilpsched_irn_t    *pred_node;
292                 ilpsched_node_attr_t *pna;
293                 unsigned             idx;
294
295                 if (be_is_Keep(pred))
296                         pred = skip_Proj(get_irn_n(pred, 0));
297
298                 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
299                         continue;
300
301                 pred_node = get_ilpsched_irn(env, pred);
302                 pna       = get_ilpsched_node_attr(pred_node);
303                 idx       = get_irn_idx(irn);
304
305                 assert(pna->asap && "missing ASAP of predecessor");
306
307                 /*
308                         We have not already visited this predecessor
309                         -> accumulate it's predecessors
310                 */
311                 if (pna->visit_idx != idx) {
312                         pna->visit_idx = idx;
313                         na->transitive_block_nodes = bitset_or(na->transitive_block_nodes, pna->transitive_block_nodes);
314                         DBG((env->dbg, LEVEL_3, "\taccumulating preds of %+F\n", pred));
315                 }
316         }
317
318         /* every node is it's own transitive predecessor in block */
319         bitset_set(na->transitive_block_nodes, na->block_idx);
320
321         /* asap = number of transitive predecessors in this block */
322         na->asap = bitset_popcnt(na->transitive_block_nodes);
323
324         DBG((env->dbg, LEVEL_2, "\tcalculated ASAP is %u\n", na->asap));
325 }
326
327 /**
328  * Accumulate the successors of all nodes from irn on upwards.
329  */
330 static void accumulate_succs(be_ilpsched_env_t *env, ir_node *irn) {
331         unsigned             i, n;
332         be_ilpsched_irn_t    *node  = get_ilpsched_irn(env, irn);
333         ilpsched_node_attr_t *na    = get_ilpsched_node_attr(node);
334         ir_node              *block = get_nodes_block(irn);
335         waitq                *wq    = new_waitq();
336
337         DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F\n", irn));
338
339         /* enqueue node for final alap calculation */
340         if (! na->enqueued) {
341                 be_ilpsched_irn_t     *block_node = get_ilpsched_irn(env, block);
342                 ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
343
344                 na->enqueued = 1;
345                 na->alap     = ba->n_interesting_nodes;
346                 waitq_put(env->alap_queue, node);
347
348                 set_irn_link(irn, ba->head_ilp_nodes);
349                 ba->head_ilp_nodes = irn;
350                 DBG((env->dbg, LEVEL_5, "\t\tlinked %+F to ilp nodes of %+F, attr %p\n", irn, block, ba));
351                 DBG((env->dbg, LEVEL_4, "\t\tenqueueing %+F for final ALAP calculation\n", irn));
352         }
353
354         for (i = 0, n = get_irn_ins_or_deps(irn); i < n; ++i) {
355                 ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
356                 unsigned             idx;
357                 be_ilpsched_irn_t    *pred_node;
358                 ilpsched_node_attr_t *pna;
359
360                 if (be_is_Keep(pred))
361                         pred = skip_Proj(get_irn_n(pred, 0));
362
363                 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
364                         continue;
365
366                 pred_node = get_ilpsched_irn(env, pred);
367                 pna       = get_ilpsched_node_attr(pred_node);
368                 idx       = get_irn_idx(irn);
369
370                 /* accumulate the successors */
371                 if (pna->visit_idx != idx) {
372                         pna->visit_idx = idx;
373                         pna->transitive_block_nodes = bitset_or(pna->transitive_block_nodes, na->transitive_block_nodes);
374
375                         /* set current node as successor */
376                         bitset_set(pna->transitive_block_nodes, na->block_idx);
377                         waitq_put(wq, pred);
378
379                         DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F to %+F\n", irn, pred));
380                 }
381         }
382
383         /* process all predecessors */
384         while (! waitq_empty(wq)) {
385                 accumulate_succs(env, waitq_get(wq));
386         }
387
388         del_waitq(wq);
389 }
390
391 /**
392  * Calculate the ALAP scheduling step of all irns in current block.
393  * Depends on ASAP beeing calculated.
394  */
395 static void calculate_block_alap(ir_node *block, void *walk_env) {
396         be_ilpsched_env_t     *env        = walk_env;
397         be_ilpsched_irn_t     *block_node = get_ilpsched_irn(env, block);
398         ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
399
400         assert(is_Block(block));
401
402         DBG((env->dbg, LEVEL_2, "Calculating ALAP for nodes in %+F (%u nodes)\n", block, ba->n_interesting_nodes));
403
404         /* TODO: Might be faster to use out edges and call phase_reinit_single_irn_data */
405         phase_reinit_block_irn_data(&env->ph, block);
406
407         /* calculate the alap of all nodes, starting at collected roots upwards */
408         while (! waitq_empty(ba->root_nodes)) {
409                 accumulate_succs(env, waitq_get(ba->root_nodes));
410         }
411
412         /* we don't need it anymore */
413         del_waitq(ba->root_nodes);
414         ba->root_nodes = NULL;
415
416         /* all interesting nodes should have their successors accumulated now */
417         while (! waitq_empty(env->alap_queue)) {
418                 be_ilpsched_irn_t    *node = waitq_get(env->alap_queue);
419                 ilpsched_node_attr_t *na   = get_ilpsched_node_attr(node);
420
421                 na->alap -= bitset_popcnt(na->transitive_block_nodes);
422                 DBG((env->dbg, LEVEL_2, "\tALAP of %+F is %u (%u succs)\n", node->irn, na->alap, bitset_popcnt(na->transitive_block_nodes)));
423         }
424 }
425
426 /**
427  * Check if node can be executed on given unit type.
428  */
429 static INLINE int is_valid_unit_type_for_node(const be_execution_unit_type_t *tp, be_ilpsched_irn_t *node) {
430         int                  i;
431         ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
432
433         for (i = na->n_unit_types - 1; i >= 0; --i) {
434                 if (na->type_info[i].tp == tp)
435                         return i;
436         }
437
438         return -1;
439 }
440
441 /**
442  * Create the ilp (add variables, build constraints, solve, build schedule from solution).
443  */
444 static void create_ilp(ir_node *block, void *walk_env) {
445         be_ilpsched_env_t     *env           = walk_env;
446         be_ilpsched_irn_t     *block_node    = get_ilpsched_irn(env, block);
447         ilpsched_block_attr_t *ba            = get_ilpsched_block_attr(block_node);
448         unsigned              num_block_var  = 0;
449         unsigned              num_nodes      = 0;
450         unsigned              n_instr_max    = env->cpu->bundle_size * env->cpu->bundels_per_cycle;
451         bitset_t              *bs_block_irns = bitset_alloca(ba->block_last_idx);
452         FILE                  *logfile       = NULL;
453         unsigned              num_cst_assign, num_cst_prec, num_cst_resrc, num_cst_bundle;
454         unsigned              t;
455         int                   glob_type_idx;
456         ir_node               *irn;
457         char                  buf[1024];
458         lpp_t                 *lpp;
459         struct obstack        var_obst;
460         lc_timer_t            *t_var;
461         lc_timer_t            *t_cst_assign;
462         lc_timer_t            *t_cst_prec;
463         lc_timer_t            *t_cst_rsrc;
464         lc_timer_t            *t_cst_bundle;
465         double                fact_var, fact_cst;
466
467 #ifdef WITH_LIBCORE
468         t_var        = lc_timer_register("beilpsched_var",        "create ilp variables");
469         t_cst_assign = lc_timer_register("beilpsched_cst_assign", "create assignment constraints");
470         t_cst_prec   = lc_timer_register("beilpsched_cst_prec",   "create precedence constraints");
471         t_cst_rsrc   = lc_timer_register("beilpsched_cst_rsrc",   "create resource constraints");
472         t_cst_bundle = lc_timer_register("beilpsched_cst_bundle", "create bundle constraints");
473         LC_STOP_AND_RESET_TIMER(t_var);
474         LC_STOP_AND_RESET_TIMER(t_cst_assign);
475         LC_STOP_AND_RESET_TIMER(t_cst_prec);
476         LC_STOP_AND_RESET_TIMER(t_cst_rsrc);
477         LC_STOP_AND_RESET_TIMER(t_cst_bundle);
478 #endif /* WITH_LIBCORE */
479
480         DBG((env->dbg, 255, "\n\n\n=========================================\n"));
481         DBG((env->dbg, 255, "  ILP Scheduling for %+F\n", block));
482         DBG((env->dbg, 255, "=========================================\n\n"));
483
484         DBG((env->dbg, LEVEL_1, "Creating ILP Variables for nodes in %+F (%u interesting nodes)\n",
485                 block, ba->n_interesting_nodes));
486
487         if (ba->n_interesting_nodes < 2) {
488                 /* TODO schedule */
489                 return;
490         }
491
492         fact_var = ba->n_interesting_nodes < 25 ? 1.0 : 0.6;
493         fact_cst = ba->n_interesting_nodes < 25 ? 0.8 : 0.6;
494         lpp = new_lpp_userdef(
495                 "be ilp scheduling",
496                 lpp_minimize,
497                 (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_var) + 1,  /* num vars */
498                 (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_cst) + 20, /* num cst */
499                 1.2);                                                                               /* grow factor */
500         obstack_init(&var_obst);
501
502         lc_timer_push(t_var);
503         foreach_linked_irns(ba->head_ilp_nodes, irn) {
504                 const be_execution_unit_t ***execunits = arch_isa_get_allowed_execution_units(env->arch_env->isa, irn);
505                 be_ilpsched_irn_t         *node;
506                 ilpsched_node_attr_t      *na;
507                 unsigned                  n_unit_types, tp_idx, unit_idx, cur_var, n_var, cur_unit;
508
509                 /* count number of available unit types for this node */
510                 for (n_unit_types = 0; execunits[n_unit_types]; ++n_unit_types)
511                         /* just count */ ;
512
513                 node = get_ilpsched_irn(env, irn);
514                 na   = get_ilpsched_node_attr(node);
515
516                 na->n_unit_types = n_unit_types;
517                 na->type_info    = NEW_ARR_D(unit_type_info_t, &var_obst, n_unit_types);
518
519                 /* fill the type info array */
520                 for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
521                         for (unit_idx = 0; execunits[tp_idx][unit_idx]; ++unit_idx)
522                                 /* just count units of this type */;
523
524                         na->type_info[tp_idx].tp      = execunits[tp_idx][0]->tp;
525                         na->type_info[tp_idx].n_units = unit_idx;
526                 }
527
528                 /* allocate space for ilp variables */
529                 na->ilp_vars = NEW_ARR_D(int, &var_obst, n_unit_types * VALID_SCHED_INTERVAL(na));
530                 memset(na->ilp_vars, -1, n_unit_types * VALID_SCHED_INTERVAL(na) * sizeof(na->ilp_vars[0]));
531
532                 DBG((env->dbg, LEVEL_3, "\thandling %+F (asap %u, alap %u, unit types %u):\n",
533                         irn, na->asap, na->alap, na->n_unit_types));
534
535                 cur_var = cur_unit = n_var = 0;
536                 /* create variables */
537                 for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
538                         for (t = na->asap - 1; t <= na->alap - 1; ++t) {
539                                 snprintf(buf, sizeof(buf), "n%u_%s_%u",
540                                         get_irn_idx(irn), na->type_info[tp_idx].tp->name, t);
541                                 na->ilp_vars[cur_var++] = lpp_add_var(lpp, buf, lpp_binary, (double)(t + 1));
542                                 n_var++;
543                                 num_block_var++;
544                                 DBG((env->dbg, LEVEL_4, "\t\tcreated ILP variable %s\n", buf));
545                         }
546                 }
547
548                 DB((env->dbg, LEVEL_3, "%u variables created\n", n_var));
549                 num_nodes++;
550         }
551         lc_timer_pop();
552         DBG((env->dbg, LEVEL_1, "... %u variables for %u nodes created (%g sec)\n",
553                 num_block_var, num_nodes, lc_timer_elapsed_usec(t_var) / 1000000.0));
554
555         /*
556                 1st:
557                 - the assignment constraints:
558                         assure each node is executed once by exactly one (allowed) execution unit
559                 - the precedence constraints:
560                         assure that no data dependencies are violated
561         */
562         num_cst_assign = num_cst_prec = num_cst_resrc = num_cst_bundle = 0;
563         DBG((env->dbg, LEVEL_1, "Creating constraints for nodes in %+F:\n", block));
564         foreach_linked_irns(ba->head_ilp_nodes, irn) {
565                 int                  cst, tp_idx, i;
566                 unsigned             cur_var;
567                 be_ilpsched_irn_t    *node;
568                 ilpsched_node_attr_t *na;
569
570                 /* the assignment constraint */
571                 lc_timer_push(t_cst_assign);
572                 snprintf(buf, sizeof(buf), "assignment_cst_n%u", get_irn_idx(irn));
573                 cst = lpp_add_cst_uniq(lpp, buf, lpp_equal, 1.0);
574                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
575                 num_cst_assign++;
576
577                 node    = get_ilpsched_irn(env, irn);
578                 na      = get_ilpsched_node_attr(node);
579                 cur_var = 0;
580
581                 lpp_set_factor_fast_bulk(lpp, cst, na->ilp_vars, na->n_unit_types * VALID_SCHED_INTERVAL(na), 1.0);
582                 lc_timer_pop();
583
584                 /* the precedence constraints */
585                 lc_timer_push(t_cst_prec);
586                 bs_block_irns = bitset_clear_all(bs_block_irns);
587                 for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
588                         ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
589                         unsigned             t_low, t_high;
590                         be_ilpsched_irn_t    *pred_node;
591                         ilpsched_node_attr_t *pna;
592
593                         if (be_is_Keep(pred))
594                                 pred = skip_Proj(get_irn_n(pred, 0));
595
596                         if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
597                                 continue;
598
599                         pred_node = get_ilpsched_irn(env, pred);
600                         pna       = get_ilpsched_node_attr(pred_node);
601
602                         assert(pna->asap > 0 && pna->alap >= pna->asap && "Invalid scheduling interval.");
603
604                         if (! bitset_is_set(bs_block_irns, pna->block_idx))
605                                 bitset_set(bs_block_irns, pna->block_idx);
606                         else
607                                 continue;
608
609                         /* irn = n, pred = m */
610                         t_low  = MAX(na->asap, pna->asap);
611                         t_high = MIN(na->alap, pna->alap);
612                         for (t = t_low - 1; t <= t_high - 1; ++t) {
613                                 int tn, tm, cur_idx;
614                                 int int_na       = (t >= na->asap - 1) ? t - na->asap + 2 : 0;
615                                 int int_pna      = (t < pna->alap)     ? pna->alap - t    : 0;
616                                 int *tmp_var_idx = NEW_ARR_F(int, int_na * na->n_unit_types + int_pna * pna->n_unit_types);
617
618                                 snprintf(buf, sizeof(buf), "precedence_n%u_n%u_%u", get_irn_idx(pred), get_irn_idx(irn), t);
619                                 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, 1.0);
620                                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
621                                 num_cst_prec++;
622
623                                 cur_idx = 0;
624
625                                 /* lpp_set_factor_fast_bulk needs variables sorted ascending by index */
626                                 if (na->ilp_vars[0] < pna->ilp_vars[0]) {
627                                         for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
628                                                 for (tn = na->asap - 1; tn <= t; ++tn) {
629                                                         unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
630                                                         tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
631                                                 }
632                                         }
633
634                                         for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
635                                                 for (tm = t; tm < pna->alap; ++tm) {
636                                                         unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
637                                                         tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
638                                                 }
639                                         }
640                                 }
641                                 else {
642                                         for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
643                                                 for (tm = t; tm < pna->alap; ++tm) {
644                                                         unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
645                                                         tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
646                                                 }
647                                         }
648
649                                         for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
650                                                 for (tn = na->asap - 1; tn <= t; ++tn) {
651                                                         unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
652                                                         tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
653                                                 }
654                                         }
655                                 }
656
657                                 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
658
659                                 DEL_ARR_F(tmp_var_idx);
660                         }
661                 }
662                 lc_timer_pop();
663         }
664         DBG((env->dbg, LEVEL_1, "\t%u assignement constraints (%g sec)\n",
665                 num_cst_assign, lc_timer_elapsed_usec(t_cst_assign) / 1000000.0));
666         DBG((env->dbg, LEVEL_1, "\t%u precedence constraints (%g sec)\n",
667                 num_cst_prec, lc_timer_elapsed_usec(t_cst_prec) / 1000000.0));
668
669         /*
670                 2nd: the resource constraints:
671                 assure that for each time step not more instructions are scheduled
672                 to the same unit types as units of this type are available
673         */
674         lc_timer_push(t_cst_rsrc);
675         for (glob_type_idx = env->cpu->n_unit_types - 1; glob_type_idx >= 0; --glob_type_idx) {
676                 unsigned t;
677                 be_execution_unit_type_t *cur_tp = &env->cpu->unit_types[glob_type_idx];
678
679                 for (t = 0; t < ba->n_interesting_nodes; ++t) {
680                         int cst;
681                         int *tmp_var_idx = NEW_ARR_F(int, ba->n_interesting_nodes);
682                         int cur_idx      = 0;
683
684                         snprintf(buf, sizeof(buf), "resource_cst_%s_%u", cur_tp->name, t);
685                         cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)cur_tp->n_units);
686                         DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
687                         num_cst_resrc++;
688
689                         foreach_linked_irns(ba->head_ilp_nodes, irn) {
690                                 be_ilpsched_irn_t    *node = get_ilpsched_irn(env, irn);
691                                 ilpsched_node_attr_t *na   = get_ilpsched_node_attr(node);
692                                 int                  tp_idx;
693
694                                 tp_idx = is_valid_unit_type_for_node(cur_tp, node);
695
696                                 if (tp_idx >= 0 && t >= na->asap - 1 && t <= na->alap - 1) {
697                                         int cur_var = ILPVAR_IDX(na, tp_idx, t);
698                                         tmp_var_idx[cur_idx++] = na->ilp_vars[cur_var];
699                                 }
700                         }
701
702                         if (cur_idx)
703                                 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
704
705                         DEL_ARR_F(tmp_var_idx);
706                 }
707         }
708         lc_timer_pop();
709         DBG((env->dbg, LEVEL_1, "\t%u resource constraints (%g sec)\n",
710                 num_cst_resrc, lc_timer_elapsed_usec(t_cst_rsrc) / 1000000.0));
711
712         /*
713                 3rd: bundle constraints:
714                 assure, at most bundle_size * bundles_per_cycle instructions
715                 can be started at a certain point.
716         */
717         lc_timer_push(t_cst_bundle);
718         for (t = 0; t < ba->n_interesting_nodes; ++t) {
719                 int cst;
720                 int *tmp_var_idx = NEW_ARR_F(int, 0);
721
722                 snprintf(buf, sizeof(buf), "bundle_cst_%u", t);
723                 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)n_instr_max);
724                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
725                 num_cst_bundle++;
726
727                 foreach_linked_irns(ba->head_ilp_nodes, irn) {
728                         be_ilpsched_irn_t    *node;
729                         ilpsched_node_attr_t *na;
730                         int                  tp_idx;
731
732                         node = get_ilpsched_irn(env, irn);
733                         na   = get_ilpsched_node_attr(node);
734
735                         if (t >= na->asap - 1 && t <= na->alap - 1) {
736                                 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
737                                         int idx = ILPVAR_IDX(na, tp_idx, t);
738                                         ARR_APP1(int, tmp_var_idx, na->ilp_vars[idx]);
739                                 }
740                         }
741                 }
742
743                 if (ARR_LEN(tmp_var_idx) > 0)
744                         lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, ARR_LEN(tmp_var_idx), 1.0);
745
746                 DEL_ARR_F(tmp_var_idx);
747         }
748         lc_timer_pop();
749         DBG((env->dbg, LEVEL_1, "\t%u bundle constraints (%g sec)\n",
750                 num_cst_bundle, lc_timer_elapsed_usec(t_cst_bundle) / 1000000.0));
751
752         DBG((env->dbg, LEVEL_1, "ILP to solve: %u variables, %u constraints\n", lpp->var_next, lpp->cst_next));
753
754         DEBUG_ONLY(
755                 if (firm_dbg_get_mask(env->dbg) & 1) {
756                         FILE *f;
757
758                         snprintf(buf, sizeof(buf), "lpp_block_%lu.txt", get_irn_node_nr(block));
759                         f = fopen(buf, "w");
760                         lpp_dump_plain(lpp, f);
761                         fclose(f);
762                         snprintf(buf, sizeof(buf), "lpp_block_%lu.mps", get_irn_node_nr(block));
763                         lpp_dump(lpp, "buf");
764                 }
765         );
766
767         lpp_set_time_limit(lpp, env->opts->time_limit);
768
769         /* set logfile if requested */
770         if (strlen(env->opts->log_file) > 0) {
771                 if (strcasecmp(env->opts->log_file, "stdout") == 0)
772                         lpp_set_log(lpp, stdout);
773                 else if (strcasecmp(env->opts->log_file, "stderr") == 0)
774                         lpp_set_log(lpp, stderr);
775                 else {
776                         logfile = fopen(env->opts->log_file, "w");
777                         if (! logfile)
778                                 fprintf(stderr, "Could not open logfile '%s'! Logging disabled.\n", env->opts->log_file);
779                         else
780                                 lpp_set_log(lpp, logfile);
781                 }
782         }
783
784         lpp_solve_net(lpp, env->main_env->options->ilp_server, env->main_env->options->ilp_solver);
785
786         if (logfile)
787                 fclose(logfile);
788
789         if (! lpp_is_sol_valid(lpp)) {
790                 FILE *f;
791
792                 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.txt", get_irn_node_nr(block));
793                 f = fopen(buf, "w");
794                 lpp_dump_plain(lpp, f);
795                 fclose(f);
796                 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.mps", get_irn_node_nr(block));
797                 lpp_dump(lpp, buf);
798                 dump_ir_block_graph(env->irg, "-assert");
799
800                 assert(0 && "ILP solution is not feasible!");
801         }
802
803         DBG((env->dbg, LEVEL_1, "\nSolution:\n"));
804         DBG((env->dbg, LEVEL_1, "\tsend time: %g sec\n", lpp->send_time / 1000000.0));
805         DBG((env->dbg, LEVEL_1, "\treceive time: %g sec\n", lpp->recv_time / 1000000.0));
806         DBG((env->dbg, LEVEL_1, "\titerations: %d\n", lpp->iterations));
807         DBG((env->dbg, LEVEL_1, "\tsolution time: %g\n", lpp->sol_time));
808         DBG((env->dbg, LEVEL_1, "\tobjective function: %g\n", LPP_VALUE_IS_0(lpp->objval) ? 0.0 : lpp->objval));
809         DBG((env->dbg, LEVEL_1, "\tbest bound: %g\n", LPP_VALUE_IS_0(lpp->best_bound) ? 0.0 : lpp->best_bound));
810
811         /* apply solution */
812         foreach_linked_irns(ba->head_ilp_nodes, irn) {
813                 be_ilpsched_irn_t    *node;
814                 ilpsched_node_attr_t *na;
815                 int                  tp_idx;
816                 unsigned             cur_var;
817
818                 node    = get_ilpsched_irn(env, irn);
819                 na      = get_ilpsched_node_attr(node);
820                 cur_var = 0;
821
822                 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
823                         for (t = na->asap - 1; t <= na->alap - 1; ++t) {
824                                 double val = lpp_get_var_sol(lpp, na->ilp_vars[cur_var++]);
825                                 if (! LPP_VALUE_IS_0(val)) {
826                                         DBG((env->dbg, LEVEL_1, "Schedpoint of %+F is %u at unit type %s\n",
827                                                 irn, t, na->type_info[tp_idx].tp->name));
828                                 }
829                         }
830                 }
831         }
832
833         obstack_free(&var_obst, NULL);
834         free_lpp(lpp);
835 }
836
837 /**
838  * Perform ILP scheduling on the given irg.
839  */
840 void be_ilp_sched(const be_irg_t *birg) {
841         be_ilpsched_env_t env;
842         const char        *name = "be ilp scheduling";
843
844         FIRM_DBG_REGISTER(env.dbg, "firm.be.sched.ilp");
845
846         //firm_dbg_set_mask(env.dbg, 31);
847
848         env.irg        = birg->irg;
849         env.main_env   = birg->main_env;
850         env.alap_queue = new_waitq();
851         env.arch_env   = birg->main_env->arch_env;
852         env.cpu        = arch_isa_get_machine(birg->main_env->arch_env->isa);
853         env.opts       = &ilp_opts;
854         phase_init(&env.ph, name, env.irg, PHASE_DEFAULT_GROWTH, init_ilpsched_irn);
855
856         irg_walk_in_or_dep_graph(env.irg, NULL, build_block_idx, &env);
857
858         /*
859                 The block indicees are completely build after the walk,
860                 now we can allocate the bitsets (size depends on block indicees)
861                 for all nodes.
862         */
863         phase_reinit_irn_data(&env.ph);
864
865         /* Collect all root nodes (having no user in their block) and calculate ASAP. */
866         irg_walk_in_or_dep_blkwise_graph(env.irg, collect_alap_root_nodes, calculate_irn_asap, &env);
867
868         /* calculate ALAP and create variables */
869         irg_block_walk_graph(env.irg, calculate_block_alap, create_ilp, &env);
870
871         DEBUG_ONLY(
872                 if (firm_dbg_get_mask(env.dbg)) {
873                         phase_stat_t stat;
874                         phase_stat_t *stat_ptr = phase_stat(&env.ph, &stat);
875
876                         fprintf(stderr, "Phase used: %u bytes\n", stat_ptr->overall_bytes);
877                 }
878         );
879
880         /* free all allocated object */
881         del_waitq(env.alap_queue);
882         phase_free(&env.ph);
883 }
884
885 #ifdef WITH_LIBCORE
886 /**
887  * Register ILP scheduler options.
888  */
889 void ilpsched_register_options(lc_opt_entry_t *grp) {
890         static int     run_once = 0;
891         lc_opt_entry_t *sched_grp;
892
893         if (! run_once) {
894                 run_once  = 1;
895                 sched_grp = lc_opt_get_grp(grp, "ilpsched");
896
897                 lc_opt_add_table(sched_grp, ilpsched_option_table);
898         }
899 }
900 #endif /* WITH_LIBCORE */
901
902 #else /* WITH_ILP */
903
904 static int some_picky_compiler_do_not_allow_empty_files;
905
906 #endif /* WITH_ILP */