use only one variable for each unit type instead one foreach unit
[libfirm] / ir / be / beilpsched.c
1 /**
2  * Scheduling algorithms.
3  * An ILP scheduler based on
4  * "ILP-based Instruction Scheduling for IA-64"
5  * by Daniel Kaestner and Sebastian Winkel
6  *
7  * @date   22.10.2005
8  * @author Christian Wuerdig
9  * @cvs-id $Id$
10  */
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #ifdef WITH_ILP
16
17 #include <math.h>
18
19 #include "irnode_t.h"
20 #include "irgwalk.h"
21 #include "irbitset.h"
22 #include "irphase_t.h"
23 #include "iredges.h"
24 #include "debug.h"
25 #include "pdeq.h"
26 #include "irtools.h"
27 #include "irdump.h"
28
29 #include <lpp/lpp.h>
30 #include <lpp/lpp_net.h>
31
32 #ifdef WITH_LIBCORE
33 #include <libcore/lc_opts.h>
34 #include <libcore/lc_opts_enum.h>
35 #include <libcore/lc_timing.h>
36 #endif /* WITH_LIBCORE */
37
38 #include "be.h"
39 #include "benode_t.h"
40
41 typedef struct _unit_type_info_t {
42         int                            n_units;
43         const be_execution_unit_type_t *tp;
44 } unit_type_info_t;
45
46 /* attributes for a node */
47 typedef struct _ilpsched_node_attr_t {
48         unsigned asap;                     /**< The ASAP scheduling control step */
49         unsigned alap;                     /**< The ALAP scheduling control step */
50         unsigned sched_point;              /**< the step in which the node is finally scheduled */
51         unsigned visit_idx;                /**< Index of the node having visited this node last */
52         unsigned block_idx : 31;           /**< A unique per block index */
53         unsigned enqueued  : 1;            /**< Node is already enqueued for ALAP calculation */
54         bitset_t *transitive_block_nodes;  /**< Set of transitive block nodes (predecessors
55                                                                                         for ASAP, successors for ALAP */
56         unsigned n_unit_types;             /**< number of allowed execution unit types */
57         unit_type_info_t *type_info;       /**< list of allowed execution unit types */
58         int      *ilp_vars;                /**< the binary ilp variables x_{nt}^k assigned to this node
59                                                                                         (== 1 iff: node n is executed at step t on execunit k
60                                                                                         So we have: |ASAP(n) ... ALAP(n)| * |execunits| variables
61                                                                                 */
62 } ilpsched_node_attr_t;
63
64 /* attributes for a block */
65 typedef struct _ilpsched_block_attr_t {
66         unsigned block_last_idx;        /**< The highest node index in block so far */
67         unsigned n_interesting_nodes;   /**< The number of nodes interesting for scheduling */
68         waitq    *root_nodes;           /**< A queue of nodes having no user in current block */
69         ir_node  *head_ilp_nodes;       /**< A linked list of nodes which will contribute to ILP */
70 } ilpsched_block_attr_t;
71
72 typedef union _ilpsched_attr_ {
73         ilpsched_node_attr_t  node_attr;
74         ilpsched_block_attr_t block_attr;
75 } ilpsched_attr_t;
76
77 /* A irn for the phase and it's attributes (either node or block) */
78 typedef struct {
79         ir_node         *irn;
80         ilpsched_attr_t attr;
81 } be_ilpsched_irn_t;
82
83 typedef struct {
84         phase_t             ph;            /**< The phase */
85         ir_graph            *irg;          /**< The current irg */
86         waitq               *alap_queue;   /**< An queue of nodes waiting for final ALAP calculation */
87         const arch_env_t    *arch_env;
88         const be_main_env_t *main_env;
89         const be_machine_t  *cpu;          /**< the current abstract machine */
90         DEBUG_ONLY(firm_dbg_module_t *dbg);
91 } be_ilpsched_env_t;
92
93 #define get_ilpsched_irn(ilpsched_env, irn) (phase_get_or_set_irn_data(&(ilpsched_env)->ph, (irn)))
94 #define is_ilpsched_block(node)             (is_Block((node)->irn))
95 #define get_ilpsched_block_attr(block)      (&(block)->attr.block_attr)
96 #define get_ilpsched_node_attr(node)        (&(node)->attr.node_attr)
97
98 #define foreach_linked_irns(head, iter) for ((iter) = (head); (iter); (iter) = get_irn_link((iter)))
99
100 #define consider_for_sched(irn) \
101         (! (is_Block(irn) || is_Proj(irn) || is_Phi(irn) || be_is_Keep(irn) || is_NoMem(irn) || is_Jmp(irn)))
102
103 #define VALID_SCHED_INTERVAL(na) ((na)->alap - (na)->asap + 1)
104
105 #define ILPVAR_IDX(na, unit, control_step) \
106         ((unit) * VALID_SCHED_INTERVAL((na)) + (control_step) - (na)->asap + 1)
107
108 #define LPP_VALUE_IS_0(dbl) (fabs((dbl)) <= 1e-10)
109
110 static int cmp_ilp_var(const void *a, const void *b) {
111         int i = *(int *)a;
112         int j = *(int *)b;
113
114         return (i > j) - (i < j);
115 }
116
117 /**
118  * In case there is no phase information for irn, initialize it.
119  */
120 static void *init_ilpsched_irn(phase_t *ph, ir_node *irn, void *old) {
121         be_ilpsched_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
122
123         if (res == old) {
124                 if (! is_Block(irn)) {
125                         ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
126
127                         if (! na->transitive_block_nodes) {
128                                 ir_node               *block      = get_nodes_block(irn);
129                                 be_ilpsched_irn_t     *block_node = phase_get_or_set_irn_data(ph, block);
130                                 ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
131
132                                 /* we are called after the block indicees have been build: create bitset */
133                                 na->transitive_block_nodes = bitset_obstack_alloc(phase_obst(ph), ba->block_last_idx);
134                         }
135                         else {
136                                 /* we are called from reinit block data: clear the bitset */
137                                 bitset_clear_all(na->transitive_block_nodes);
138                                 na->visit_idx = 0;
139                         }
140                 }
141                 return old;
142         }
143
144         res->irn = irn;
145
146         if (is_Block(irn)) {
147                 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(res);
148
149                 ba->n_interesting_nodes = 0;
150                 ba->block_last_idx      = 0;
151                 ba->root_nodes          = new_waitq();
152                 ba->head_ilp_nodes      = NULL;
153         }
154         else {
155                 ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
156                 memset(na, 0, sizeof(*na));
157         }
158
159         return res;
160 }
161
162 /**
163  * Assign a per block unique number to each node.
164  */
165 static void build_block_idx(ir_node *irn, void *walk_env) {
166         be_ilpsched_env_t     *env = walk_env;
167         be_ilpsched_irn_t     *node, *block_node;
168         ilpsched_node_attr_t  *na;
169         ilpsched_block_attr_t *ba;
170
171         if (! consider_for_sched(irn))
172                 return;
173
174         node       = get_ilpsched_irn(env, irn);
175         na         = get_ilpsched_node_attr(node);
176         block_node = get_ilpsched_irn(env, get_nodes_block(irn));
177         ba         = get_ilpsched_block_attr(block_node);
178
179         na->block_idx = ba->block_last_idx++;
180 }
181
182 /**
183  * Add all nodes having no user in current block to last_nodes list.
184  */
185 static void collect_alap_root_nodes(ir_node *irn, void *walk_env) {
186         ir_node               *block;
187         const ir_edge_t       *edge;
188         be_ilpsched_irn_t     *block_node;
189         ilpsched_block_attr_t *ba;
190         be_ilpsched_env_t     *env           = walk_env;
191         int                   has_block_user = 0;
192         ir_edge_kind_t        ekind[2]       = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
193         int                   i, j;
194
195         if (! consider_for_sched(irn))
196                 return;
197
198         block = get_nodes_block(irn);
199
200         DBG((env->dbg, LEVEL_3, "%+F (%+F) is interesting, examining ... ", irn, block));
201
202         for (i = 0; i < 2; ++i) {
203                 foreach_out_edge_kind(irn, edge, ekind[i]) {
204                         ir_node *user = get_edge_src_irn(edge);
205
206                         if (is_Proj(user)) {
207                                 const ir_edge_t *user_edge;
208
209                                 if (get_irn_mode(user) == mode_X)
210                                         continue;
211
212                                 /* The ABI ensures, that there will be no ProjT nodes in the graph. */
213                                 for (j = 0; j < 2; ++j) {
214                                         foreach_out_edge_kind(user, user_edge, ekind[j]) {
215                                                 ir_node *real_user = get_edge_src_irn(user_edge);
216
217                                                 if (! is_Phi(real_user) && ! be_is_Keep(real_user) && get_nodes_block(real_user) == block) {
218                                                         has_block_user = 1;
219                                                         break;
220                                                 }
221                                         }
222                                 }
223
224                                 if (has_block_user)
225                                         break;
226                         }
227                         else if (is_Block(user)) {
228                                 continue;
229                         }
230                         else if (! is_Phi(user) && ! be_is_Keep(user) && get_nodes_block(user) == block) {
231                                 has_block_user = 1;
232                                 break;
233                         }
234                 }
235         }
236
237         block_node = get_ilpsched_irn(env, block);
238         ba         = get_ilpsched_block_attr(block_node);
239
240         ba->n_interesting_nodes++;
241
242         /* current irn has no user inside this block, add to queue */
243         if (! has_block_user) {
244                 DB((env->dbg, LEVEL_3, "root node\n"));
245                 waitq_put(ba->root_nodes, irn);
246         }
247         else {
248                 DB((env->dbg, LEVEL_3, "normal node\n"));
249         }
250 }
251
252 /**
253  * Calculate the ASAP scheduling step for current irn.
254  */
255 static void calculate_irn_asap(ir_node *irn, void *walk_env) {
256         be_ilpsched_irn_t *node;
257         be_ilpsched_env_t *env = walk_env;
258         int      i;
259         ir_node  *block;
260         ilpsched_node_attr_t *na;
261
262         /* These nodes are handled separate */
263         if (! consider_for_sched(irn))
264                 return;
265
266         DBG((env->dbg, LEVEL_2, "Calculating ASAP of node %+F\n", irn));
267
268         node  = get_ilpsched_irn(env, irn);
269         block = get_nodes_block(irn);
270         na    = get_ilpsched_node_attr(node);
271
272         /* accumulate all transitive predecessors of current node */
273         for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
274                 ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
275                 be_ilpsched_irn_t    *pred_node;
276                 ilpsched_node_attr_t *pna;
277                 unsigned             idx;
278
279                 if (be_is_Keep(pred))
280                         pred = skip_Proj(get_irn_n(pred, 0));
281
282                 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
283                         continue;
284
285                 pred_node = get_ilpsched_irn(env, pred);
286                 pna       = get_ilpsched_node_attr(pred_node);
287                 idx       = get_irn_idx(irn);
288
289                 assert(pna->asap && "missing ASAP of predecessor");
290
291                 /*
292                         We have not already visited this predecessor
293                         -> accumulate it's predecessors
294                 */
295                 if (pna->visit_idx != idx) {
296                         pna->visit_idx = idx;
297                         na->transitive_block_nodes = bitset_or(na->transitive_block_nodes, pna->transitive_block_nodes);
298                         DBG((env->dbg, LEVEL_3, "\taccumulating preds of %+F\n", pred));
299                 }
300         }
301
302         /* every node is it's own transitive predecessor in block */
303         bitset_set(na->transitive_block_nodes, na->block_idx);
304
305         /* asap = number of transitive predecessors in this block */
306         na->asap = bitset_popcnt(na->transitive_block_nodes);
307
308         DBG((env->dbg, LEVEL_2, "\tcalculated ASAP is %u\n", na->asap));
309 }
310
311 /**
312  * Accumulate the successors of all nodes from irn on upwards.
313  */
314 static void accumulate_succs(be_ilpsched_env_t *env, ir_node *irn) {
315         unsigned             i, n;
316         be_ilpsched_irn_t    *node  = get_ilpsched_irn(env, irn);
317         ilpsched_node_attr_t *na    = get_ilpsched_node_attr(node);
318         ir_node              *block = get_nodes_block(irn);
319         waitq                *wq    = new_waitq();
320
321         DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F\n", irn));
322
323         /* enqueue node for final alap calculation */
324         if (! na->enqueued) {
325                 be_ilpsched_irn_t     *block_node = get_ilpsched_irn(env, block);
326                 ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
327
328                 na->enqueued = 1;
329                 na->alap     = ba->n_interesting_nodes;
330                 waitq_put(env->alap_queue, node);
331
332                 set_irn_link(irn, ba->head_ilp_nodes);
333                 ba->head_ilp_nodes = irn;
334                 DBG((env->dbg, LEVEL_5, "\t\tlinked %+F to ilp nodes of %+F, attr %p\n", irn, block, ba));
335                 DBG((env->dbg, LEVEL_4, "\t\tenqueueing %+F for final ALAP calculation\n", irn));
336         }
337
338         for (i = 0, n = get_irn_ins_or_deps(irn); i < n; ++i) {
339                 ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
340                 unsigned             idx;
341                 be_ilpsched_irn_t    *pred_node;
342                 ilpsched_node_attr_t *pna;
343
344                 if (be_is_Keep(pred))
345                         pred = skip_Proj(get_irn_n(pred, 0));
346
347                 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
348                         continue;
349
350                 pred_node = get_ilpsched_irn(env, pred);
351                 pna       = get_ilpsched_node_attr(pred_node);
352                 idx       = get_irn_idx(irn);
353
354                 /* accumulate the successors */
355                 if (pna->visit_idx != idx) {
356                         pna->visit_idx = idx;
357                         pna->transitive_block_nodes = bitset_or(pna->transitive_block_nodes, na->transitive_block_nodes);
358
359                         /* set current node as successor */
360                         bitset_set(pna->transitive_block_nodes, na->block_idx);
361                         waitq_put(wq, pred);
362
363                         DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F to %+F\n", irn, pred));
364                 }
365         }
366
367         /* process all predecessors */
368         while (! waitq_empty(wq)) {
369                 accumulate_succs(env, waitq_get(wq));
370         }
371
372         del_waitq(wq);
373 }
374
375 /**
376  * Calculate the ALAP scheduling step of all irns in current block.
377  * Depends on ASAP beeing calculated.
378  */
379 static void calculate_block_alap(ir_node *block, void *walk_env) {
380         be_ilpsched_env_t     *env        = walk_env;
381         be_ilpsched_irn_t     *block_node = get_ilpsched_irn(env, block);
382         ilpsched_block_attr_t *ba         = get_ilpsched_block_attr(block_node);
383
384         assert(is_Block(block));
385
386         DBG((env->dbg, LEVEL_2, "Calculating ALAP for nodes in %+F (%u nodes)\n", block, ba->n_interesting_nodes));
387
388         /* TODO: Might be faster to use out edges and call phase_reinit_single_irn_data */
389         phase_reinit_block_irn_data(&env->ph, block);
390
391         /* calculate the alap of all nodes, starting at collected roots upwards */
392         while (! waitq_empty(ba->root_nodes)) {
393                 accumulate_succs(env, waitq_get(ba->root_nodes));
394         }
395
396         /* we don't need it anymore */
397         del_waitq(ba->root_nodes);
398         ba->root_nodes = NULL;
399
400         /* all interesting nodes should have their successors accumulated now */
401         while (! waitq_empty(env->alap_queue)) {
402                 be_ilpsched_irn_t    *node = waitq_get(env->alap_queue);
403                 ilpsched_node_attr_t *na   = get_ilpsched_node_attr(node);
404
405                 na->alap -= bitset_popcnt(na->transitive_block_nodes);
406                 DBG((env->dbg, LEVEL_2, "\tALAP of %+F is %u (%u succs)\n", node->irn, na->alap, bitset_popcnt(na->transitive_block_nodes)));
407         }
408 }
409
410 /**
411  * Check if node can be executed on given unit type.
412  */
413 static INLINE int is_valid_unit_type_for_node(const be_execution_unit_type_t *tp, be_ilpsched_irn_t *node) {
414         int                  i;
415         ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
416
417         for (i = na->n_unit_types - 1; i >= 0; --i) {
418                 if (na->type_info[i].tp == tp)
419                         return i;
420         }
421
422         return -1;
423 }
424
425 /**
426  * Create the ilp (add variables, build constraints, solve, build schedule from solution).
427  */
428 static void create_ilp(ir_node *block, void *walk_env) {
429         be_ilpsched_env_t     *env           = walk_env;
430         be_ilpsched_irn_t     *block_node    = get_ilpsched_irn(env, block);
431         ilpsched_block_attr_t *ba            = get_ilpsched_block_attr(block_node);
432         unsigned              num_block_var  = 0;
433         unsigned              num_nodes      = 0;
434         unsigned              n_instr_max    = env->cpu->bundle_size * env->cpu->bundels_per_cycle;
435         bitset_t              *bs_block_irns = bitset_alloca(ba->block_last_idx);
436         unsigned              num_cst_assign, num_cst_prec, num_cst_resrc, num_cst_bundle;
437         unsigned              t;
438         int                   glob_type_idx;
439         ir_node               *irn;
440         char                  buf[1024];
441         lpp_t                 *lpp;
442         struct obstack        var_obst;
443         lc_timer_t            *t_var;
444         lc_timer_t            *t_cst_assign;
445         lc_timer_t            *t_cst_prec;
446         lc_timer_t            *t_cst_rsrc;
447         lc_timer_t            *t_cst_bundle;
448         double                fact_var, fact_cst;
449
450 #ifdef WITH_LIBCORE
451         t_var        = lc_timer_register("beilpsched_var",        "create ilp variables");
452         t_cst_assign = lc_timer_register("beilpsched_cst_assign", "create assignment constraints");
453         t_cst_prec   = lc_timer_register("beilpsched_cst_prec",   "create precedence constraints");
454         t_cst_rsrc   = lc_timer_register("beilpsched_cst_rsrc",   "create resource constraints");
455         t_cst_bundle = lc_timer_register("beilpsched_cst_bundle", "create bundle constraints");
456         LC_STOP_AND_RESET_TIMER(t_var);
457         LC_STOP_AND_RESET_TIMER(t_cst_assign);
458         LC_STOP_AND_RESET_TIMER(t_cst_prec);
459         LC_STOP_AND_RESET_TIMER(t_cst_rsrc);
460         LC_STOP_AND_RESET_TIMER(t_cst_bundle);
461 #endif /* WITH_LIBCORE */
462
463         DBG((env->dbg, 255, "\n\n\n=========================================\n"));
464         DBG((env->dbg, 255, "  ILP Scheduling for %+F\n", block));
465         DBG((env->dbg, 255, "=========================================\n\n"));
466
467         DBG((env->dbg, LEVEL_1, "Creating ILP Variables for nodes in %+F (%u interesting nodes)\n",
468                 block, ba->n_interesting_nodes));
469
470         if (ba->n_interesting_nodes < 2) {
471                 /* TODO schedule */
472                 return;
473         }
474
475         fact_var = ba->n_interesting_nodes < 25 ? 1.0 : 0.6;
476         fact_cst = ba->n_interesting_nodes < 25 ? 0.8 : 0.6;
477         lpp = new_lpp_userdef(
478                 "be ilp scheduling",
479                 lpp_minimize,
480                 (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_var) + 1,  /* num vars */
481                 (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_cst) + 20, /* num cst */
482                 1.2);                                                                               /* grow factor */
483         obstack_init(&var_obst);
484
485         lc_timer_push(t_var);
486         foreach_linked_irns(ba->head_ilp_nodes, irn) {
487                 const be_execution_unit_t ***execunits = arch_isa_get_allowed_execution_units(env->arch_env->isa, irn);
488                 be_ilpsched_irn_t         *node;
489                 ilpsched_node_attr_t      *na;
490                 unsigned                  n_unit_types, tp_idx, unit_idx, cur_var, n_var, cur_unit;
491
492                 /* count number of available unit typesfor this node */
493                 for (n_unit_types = 0; execunits[n_unit_types]; ++n_unit_types)
494                         /* just count */ ;
495
496                 node = get_ilpsched_irn(env, irn);
497                 na   = get_ilpsched_node_attr(node);
498
499                 na->n_unit_types = n_unit_types;
500                 na->type_info    = NEW_ARR_D(unit_type_info_t, &var_obst, n_unit_types);
501
502                 /* fill the type info array */
503                 for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
504                         for (unit_idx = 0; execunits[tp_idx][unit_idx]; ++unit_idx)
505                                 /* just count units of this type */;
506
507                         na->type_info[tp_idx].tp      = execunits[tp_idx][0]->tp;
508                         na->type_info[tp_idx].n_units = unit_idx;
509                 }
510
511                 /* allocate space for ilp variables */
512                 na->ilp_vars = NEW_ARR_D(int, &var_obst, n_unit_types * VALID_SCHED_INTERVAL(na));
513                 memset(na->ilp_vars, -1, n_unit_types * VALID_SCHED_INTERVAL(na) * sizeof(na->ilp_vars[0]));
514
515                 DBG((env->dbg, LEVEL_3, "\thandling %+F (asap %u, alap %u, unit types %u):\n",
516                         irn, na->asap, na->alap, na->n_unit_types));
517
518                 cur_var = cur_unit = n_var = 0;
519                 /* create variables */
520                 for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
521                         for (t = na->asap - 1; t <= na->alap - 1; ++t) {
522                                 snprintf(buf, sizeof(buf), "n%u_%s_%u",
523                                         get_irn_idx(irn), na->type_info[tp_idx].tp->name, t);
524                                 na->ilp_vars[cur_var++] = lpp_add_var(lpp, buf, lpp_binary, (double)(t + 1));
525                                 n_var++;
526                                 num_block_var++;
527                                 DBG((env->dbg, LEVEL_4, "\t\tcreated ILP variable %s\n", buf));
528                         }
529                 }
530
531                 DB((env->dbg, LEVEL_3, "%u variables created\n", n_var));
532                 num_nodes++;
533         }
534         lc_timer_pop();
535         DBG((env->dbg, LEVEL_1, "... %u variables for %u nodes created (%g sec)\n",
536                 num_block_var, num_nodes, lc_timer_elapsed_usec(t_var) / 1000000.0));
537
538         /*
539                 1st:
540                 - the assignment constraints:
541                         assure each node is executed once by exactly one (allowed) execution unit
542                 - the precedence constraints:
543                         assure that no data dependencies are violated
544         */
545         num_cst_assign = num_cst_prec = num_cst_resrc = num_cst_bundle = 0;
546         DBG((env->dbg, LEVEL_1, "Creating constraints for nodes in %+F:\n", block));
547         foreach_linked_irns(ba->head_ilp_nodes, irn) {
548                 int                  cst, tp_idx, i;
549                 unsigned             cur_var;
550                 be_ilpsched_irn_t    *node;
551                 ilpsched_node_attr_t *na;
552                 struct obstack       idx_obst;
553
554                 /* the assignment constraint */
555                 lc_timer_push(t_cst_assign);
556                 snprintf(buf, sizeof(buf), "assignment_cst_n%u", get_irn_idx(irn));
557                 cst = lpp_add_cst_uniq(lpp, buf, lpp_equal, 1.0);
558                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
559                 num_cst_assign++;
560
561                 node    = get_ilpsched_irn(env, irn);
562                 na      = get_ilpsched_node_attr(node);
563                 cur_var = 0;
564
565                 lpp_set_factor_fast_bulk(lpp, cst, na->ilp_vars, na->n_unit_types * VALID_SCHED_INTERVAL(na), 1.0);
566                 lc_timer_pop();
567
568                 /* the precedence constraints */
569                 lc_timer_push(t_cst_prec);
570                 bs_block_irns = bitset_clear_all(bs_block_irns);
571                 for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
572                         ir_node              *pred = skip_Proj(get_irn_in_or_dep(irn, i));
573                         unsigned             t_low, t_high;
574                         be_ilpsched_irn_t    *pred_node;
575                         ilpsched_node_attr_t *pna;
576
577                         if (be_is_Keep(pred))
578                                 pred = skip_Proj(get_irn_n(pred, 0));
579
580                         if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
581                                 continue;
582
583                         pred_node = get_ilpsched_irn(env, pred);
584                         pna       = get_ilpsched_node_attr(pred_node);
585
586                         assert(pna->asap > 0 && pna->alap >= pna->asap && "Invalid scheduling interval.");
587
588                         if (! bitset_is_set(bs_block_irns, pna->block_idx))
589                                 bitset_set(bs_block_irns, pna->block_idx);
590                         else
591                                 continue;
592
593                         /* irn = n, pred = m */
594                         t_low  = MAX(na->asap, pna->asap);
595                         t_high = MIN(na->alap, pna->alap);
596                         for (t = t_low - 1; t <= t_high - 1; ++t) {
597                                 int tn, tm, cur_idx;
598                                 int int_na       = (t >= na->asap - 1) ? t - na->asap + 2 : 0;
599                                 int int_pna      = (t < pna->alap)     ? pna->alap - t    : 0;
600                                 int *tmp_var_idx = NEW_ARR_F(int, int_na * na->n_unit_types + int_pna * pna->n_unit_types);
601
602                                 snprintf(buf, sizeof(buf), "precedence_n%u_n%u_%u", get_irn_idx(pred), get_irn_idx(irn), t);
603                                 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, 1.0);
604                                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
605                                 num_cst_prec++;
606
607                                 cur_idx = 0;
608
609                                 /* lpp_set_factor_fast_bulk needs variables sorted ascending by index */
610                                 if (na->ilp_vars[0] < pna->ilp_vars[0]) {
611                                         for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
612                                                 unsigned idx = ILPVAR_IDX(na, tp_idx, 0);
613                                                 for (tn = na->asap - 1; tn <= t; ++tn) {
614                                                         unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
615                                                         tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
616                                                 }
617                                         }
618
619                                         for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
620                                                 unsigned idx = ILPVAR_IDX(pna, tp_idx, 0);
621                                                 for (tm = t; tm < pna->alap; ++tm) {
622                                                         unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
623                                                         tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
624                                                 }
625                                         }
626                                 }
627                                 else {
628                                         for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
629                                                 unsigned idx = ILPVAR_IDX(pna, tp_idx, 0);
630                                                 for (tm = t; tm < pna->alap; ++tm) {
631                                                         unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
632                                                         tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
633                                                 }
634                                         }
635
636                                         for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
637                                                 unsigned idx = ILPVAR_IDX(na, tp_idx, 0);
638                                                 for (tn = na->asap - 1; tn <= t; ++tn) {
639                                                         unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
640                                                         tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
641                                                 }
642                                         }
643                                 }
644
645                                 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
646
647                                 DEL_ARR_F(tmp_var_idx);
648                         }
649                 }
650                 lc_timer_pop();
651         }
652         DBG((env->dbg, LEVEL_1, "\t%u assignement constraints (%g sec)\n",
653                 num_cst_assign, lc_timer_elapsed_usec(t_cst_assign) / 1000000.0));
654         DBG((env->dbg, LEVEL_1, "\t%u precedence constraints (%g sec)\n",
655                 num_cst_prec, lc_timer_elapsed_usec(t_cst_prec) / 1000000.0));
656
657         /*
658                 2nd: the resource constraints:
659                 assure that for each time step not more instructions are scheduled
660                 to the same unit types as units of this type are available
661         */
662         lc_timer_push(t_cst_rsrc);
663         for (glob_type_idx = env->cpu->n_unit_types - 1; glob_type_idx >= 0; --glob_type_idx) {
664                 unsigned t;
665                 be_execution_unit_type_t *cur_tp = &env->cpu->unit_types[glob_type_idx];
666
667                 for (t = 0; t < ba->n_interesting_nodes; ++t) {
668                         int cst;
669                         int *tmp_var_idx = NEW_ARR_F(int, ba->n_interesting_nodes);
670                         int cur_idx      = 0;
671
672                         snprintf(buf, sizeof(buf), "resource_cst_%s_%u", cur_tp->name, t);
673                         cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)cur_tp->n_units);
674                         DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
675                         num_cst_resrc++;
676
677                         foreach_linked_irns(ba->head_ilp_nodes, irn) {
678                                 be_ilpsched_irn_t    *node = get_ilpsched_irn(env, irn);
679                                 ilpsched_node_attr_t *na   = get_ilpsched_node_attr(node);
680                                 int                  tp_idx;
681
682                                 tp_idx = is_valid_unit_type_for_node(cur_tp, node);
683
684                                 if (tp_idx >= 0 && t >= na->asap - 1 && t <= na->alap - 1) {
685                                         int cur_var = ILPVAR_IDX(na, tp_idx, t);
686                                         tmp_var_idx[cur_idx++] = na->ilp_vars[cur_var];
687                                 }
688                         }
689
690                         if (cur_idx)
691                                 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
692
693                         DEL_ARR_F(tmp_var_idx);
694                 }
695         }
696         lc_timer_pop();
697         DBG((env->dbg, LEVEL_1, "\t%u resource constraints (%g sec)\n",
698                 num_cst_resrc, lc_timer_elapsed_usec(t_cst_rsrc) / 1000000.0));
699
700         /*
701                 3rd: bundle constraints:
702                 assure, at most bundle_size * bundles_per_cycle instructions
703                 can be started at a certain point.
704         */
705         lc_timer_push(t_cst_bundle);
706         for (t = 0; t < ba->n_interesting_nodes; ++t) {
707                 int cst;
708                 int *tmp_var_idx = NEW_ARR_F(int, 0);
709
710                 snprintf(buf, sizeof(buf), "bundle_cst_%u", t);
711                 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)n_instr_max);
712                 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
713                 num_cst_bundle++;
714
715                 foreach_linked_irns(ba->head_ilp_nodes, irn) {
716                         be_ilpsched_irn_t    *node;
717                         ilpsched_node_attr_t *na;
718                         int                  tp_idx;
719
720                         node = get_ilpsched_irn(env, irn);
721                         na   = get_ilpsched_node_attr(node);
722
723                         if (t >= na->asap - 1 && t <= na->alap - 1) {
724                                 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
725                                         int idx = ILPVAR_IDX(na, tp_idx, t);
726                                         ARR_APP1(int, tmp_var_idx, na->ilp_vars[idx]);
727                                 }
728                         }
729                 }
730
731                 if (ARR_LEN(tmp_var_idx) > 0)
732                         lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, ARR_LEN(tmp_var_idx), 1.0);
733
734                 DEL_ARR_F(tmp_var_idx);
735         }
736         lc_timer_pop();
737         DBG((env->dbg, LEVEL_1, "\t%u bundle constraints (%g sec)\n",
738                 num_cst_bundle, lc_timer_elapsed_usec(t_cst_bundle) / 1000000.0));
739
740         DBG((env->dbg, LEVEL_1, "ILP to solve: %u variables, %u constraints\n", lpp->var_next, lpp->cst_next));
741
742         DEBUG_ONLY(
743                 if (firm_dbg_get_mask(env->dbg) & 64) {
744                         FILE *f;
745
746                         snprintf(buf, sizeof(buf), "lpp_block_%lu.txt", get_irn_node_nr(block));
747                         f = fopen(buf, "w");
748                         lpp_dump_plain(lpp, f);
749                         fclose(f);
750                         snprintf(buf, sizeof(buf), "lpp_block_%lu.mps", get_irn_node_nr(block));
751                         lpp_dump(lpp, "buf");
752                 }
753         );
754
755         lpp_set_time_limit(lpp, 3600);
756         lpp_set_log(lpp, stdout);
757
758         lpp_solve_net(lpp, env->main_env->options->ilp_server, env->main_env->options->ilp_solver);
759
760         if (! lpp_is_sol_valid(lpp)) {
761                 FILE *f;
762
763                 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.txt", get_irn_node_nr(block));
764                 f = fopen(buf, "w");
765                 lpp_dump_plain(lpp, f);
766                 fclose(f);
767                 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.mps", get_irn_node_nr(block));
768                 lpp_dump(lpp, buf);
769                 dump_ir_block_graph(env->irg, "-assert");
770
771                 assert(0 && "ILP solution is not feasible!");
772         }
773
774         DBG((env->dbg, LEVEL_1, "\nSolution:\n"));
775         DBG((env->dbg, LEVEL_1, "\tsend time: %g sec\n", lpp->send_time / 1000000.0));
776         DBG((env->dbg, LEVEL_1, "\treceive time: %g sec\n", lpp->recv_time / 1000000.0));
777         DBG((env->dbg, LEVEL_1, "\titerations: %d\n", lpp->iterations));
778         DBG((env->dbg, LEVEL_1, "\tsolution time: %g\n", lpp->sol_time));
779         DBG((env->dbg, LEVEL_1, "\tobjective function: %g\n", LPP_VALUE_IS_0(lpp->objval) ? 0.0 : lpp->objval));
780         DBG((env->dbg, LEVEL_1, "\tbest bound: %g\n", LPP_VALUE_IS_0(lpp->best_bound) ? 0.0 : lpp->best_bound));
781
782         /* apply solution */
783         foreach_linked_irns(ba->head_ilp_nodes, irn) {
784                 be_ilpsched_irn_t    *node;
785                 ilpsched_node_attr_t *na;
786                 int                  tp_idx;
787                 unsigned             cur_var;
788
789                 node    = get_ilpsched_irn(env, irn);
790                 na      = get_ilpsched_node_attr(node);
791                 cur_var = 0;
792
793                 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
794                         for (t = na->asap - 1; t <= na->alap - 1; ++t) {
795                                 double val = lpp_get_var_sol(lpp, na->ilp_vars[cur_var++]);
796                                 if (! LPP_VALUE_IS_0(val)) {
797                                         DBG((env->dbg, LEVEL_1, "Schedpoint of %+F is %u at unit type %s\n",
798                                                 irn, t, na->type_info[tp_idx].tp->name));
799                                 }
800                         }
801                 }
802         }
803
804         obstack_free(&var_obst, NULL);
805         free_lpp(lpp);
806 }
807
808 /**
809  * Perform ILP scheduling on the given irg.
810  */
811 void be_ilp_sched(const be_irg_t *birg) {
812         be_ilpsched_env_t env;
813         const char        *name = "be ilp scheduling";
814
815         FIRM_DBG_REGISTER(env.dbg, "firm.be.sched.ilp");
816
817         //firm_dbg_set_mask(env.dbg, 31);
818
819         env.irg        = birg->irg;
820         env.main_env   = birg->main_env;
821         env.alap_queue = new_waitq();
822         env.arch_env   = birg->main_env->arch_env;
823         env.cpu        = arch_isa_get_machine(birg->main_env->arch_env->isa);
824         phase_init(&env.ph, name, env.irg, PHASE_DEFAULT_GROWTH, init_ilpsched_irn);
825
826         irg_walk_in_or_dep_graph(env.irg, NULL, build_block_idx, &env);
827
828         /*
829                 The block indicees are completely build after the walk,
830                 now we can allocate the bitsets (size depends on block indicees)
831                 for all nodes.
832         */
833         phase_reinit_irn_data(&env.ph);
834
835         /* Collect all root nodes (having no user in their block) and calculate ASAP. */
836         irg_walk_in_or_dep_blkwise_graph(env.irg, collect_alap_root_nodes, calculate_irn_asap, &env);
837
838         /* calculate ALAP and create variables */
839         irg_block_walk_graph(env.irg, calculate_block_alap, create_ilp, &env);
840
841         DEBUG_ONLY(
842                 if (firm_dbg_get_mask(env.dbg)) {
843                         phase_stat_t stat;
844                         phase_stat_t *stat_ptr = phase_stat(&env.ph, &stat);
845
846                         fprintf(stderr, "Phase used: %u bytes\n", stat_ptr->overall_bytes);
847                 }
848         );
849
850         /* free all allocated object */
851         del_waitq(env.alap_queue);
852         phase_free(&env.ph);
853 }
854
855 #else /* WITH_ILP */
856
857 static int some_picky_compiler_do_not_allow_empty_files;
858
859 #endif /* WITH_ILP */