2 * Scheduling algorithms.
3 * An ILP scheduler based on
4 * "ILP-based Instruction Scheduling for IA-64"
5 * by Daniel Kaestner and Sebastian Winkel
8 * @author Christian Wuerdig
26 #include "irphase_t.h"
34 #include <lpp/lpp_net.h>
37 #include <libcore/lc_opts.h>
38 #include <libcore/lc_opts_enum.h>
39 #include <libcore/lc_timing.h>
40 #endif /* WITH_LIBCORE */
45 typedef struct _ilpsched_options_t {
50 typedef struct _unit_type_info_t {
52 const be_execution_unit_type_t *tp;
55 /* attributes for a node */
56 typedef struct _ilpsched_node_attr_t {
57 unsigned asap; /**< The ASAP scheduling control step */
58 unsigned alap; /**< The ALAP scheduling control step */
59 unsigned sched_point; /**< the step in which the node is finally scheduled */
60 unsigned visit_idx; /**< Index of the node having visited this node last */
61 unsigned block_idx : 31; /**< A unique per block index */
62 unsigned enqueued : 1; /**< Node is already enqueued for ALAP calculation */
63 bitset_t *transitive_block_nodes; /**< Set of transitive block nodes (predecessors
64 for ASAP, successors for ALAP */
65 unsigned n_unit_types; /**< number of allowed execution unit types */
66 unit_type_info_t *type_info; /**< list of allowed execution unit types */
67 int *ilp_vars; /**< the binary ilp variables x_{nt}^k assigned to this node
68 (== 1 iff: node n is executed at step t on execunit k
69 So we have: |ASAP(n) ... ALAP(n)| * |execunits| variables
71 } ilpsched_node_attr_t;
73 /* attributes for a block */
74 typedef struct _ilpsched_block_attr_t {
75 unsigned block_last_idx; /**< The highest node index in block so far */
76 unsigned n_interesting_nodes; /**< The number of nodes interesting for scheduling */
77 waitq *root_nodes; /**< A queue of nodes having no user in current block */
78 ir_node *head_ilp_nodes; /**< A linked list of nodes which will contribute to ILP */
79 } ilpsched_block_attr_t;
81 typedef union _ilpsched_attr_ {
82 ilpsched_node_attr_t node_attr;
83 ilpsched_block_attr_t block_attr;
86 /* A irn for the phase and it's attributes (either node or block) */
93 phase_t ph; /**< The phase */
94 ir_graph *irg; /**< The current irg */
95 waitq *alap_queue; /**< An queue of nodes waiting for final ALAP calculation */
96 const arch_env_t *arch_env;
97 const be_main_env_t *main_env;
98 const be_machine_t *cpu; /**< the current abstract machine */
99 ilpsched_options_t *opts; /**< the ilp options for current irg */
100 DEBUG_ONLY(firm_dbg_module_t *dbg);
103 #define get_ilpsched_irn(ilpsched_env, irn) (phase_get_or_set_irn_data(&(ilpsched_env)->ph, (irn)))
104 #define is_ilpsched_block(node) (is_Block((node)->irn))
105 #define get_ilpsched_block_attr(block) (&(block)->attr.block_attr)
106 #define get_ilpsched_node_attr(node) (&(node)->attr.node_attr)
108 #define foreach_linked_irns(head, iter) for ((iter) = (head); (iter); (iter) = get_irn_link((iter)))
110 #define consider_for_sched(irn) \
111 (! (is_Block(irn) || is_Proj(irn) || is_Phi(irn) || be_is_Keep(irn) || is_NoMem(irn) || is_Jmp(irn)))
113 #define VALID_SCHED_INTERVAL(na) ((na)->alap - (na)->asap + 1)
115 #define ILPVAR_IDX(na, unit, control_step) \
116 ((unit) * VALID_SCHED_INTERVAL((na)) + (control_step) - (na)->asap + 1)
118 #define LPP_VALUE_IS_0(dbl) (fabs((dbl)) <= 1e-10)
120 static ilpsched_options_t ilp_opts = {
121 120, /* 120 sec per block time limit */
126 static const lc_opt_table_entry_t ilpsched_option_table[] = {
127 LC_OPT_ENT_INT("time_limit", "ILP time limit per block", &ilp_opts.time_limit),
128 LC_OPT_ENT_STR("lpp_log", "LPP logfile (stderr and stdout are supported)", ilp_opts.log_file, sizeof(ilp_opts.log_file)),
131 #endif /* WITH_LIBCORE */
134 * In case there is no phase information for irn, initialize it.
136 static void *init_ilpsched_irn(phase_t *ph, ir_node *irn, void *old) {
137 be_ilpsched_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
140 if (! is_Block(irn)) {
141 ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
143 if (! na->transitive_block_nodes) {
144 ir_node *block = get_nodes_block(irn);
145 be_ilpsched_irn_t *block_node = phase_get_or_set_irn_data(ph, block);
146 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(block_node);
148 /* we are called after the block indicees have been build: create bitset */
149 na->transitive_block_nodes = bitset_obstack_alloc(phase_obst(ph), ba->block_last_idx);
152 /* we are called from reinit block data: clear the bitset */
153 bitset_clear_all(na->transitive_block_nodes);
163 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(res);
165 ba->n_interesting_nodes = 0;
166 ba->block_last_idx = 0;
167 ba->root_nodes = new_waitq();
168 ba->head_ilp_nodes = NULL;
171 ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
172 memset(na, 0, sizeof(*na));
179 * Assign a per block unique number to each node.
181 static void build_block_idx(ir_node *irn, void *walk_env) {
182 be_ilpsched_env_t *env = walk_env;
183 be_ilpsched_irn_t *node, *block_node;
184 ilpsched_node_attr_t *na;
185 ilpsched_block_attr_t *ba;
187 if (! consider_for_sched(irn))
190 node = get_ilpsched_irn(env, irn);
191 na = get_ilpsched_node_attr(node);
192 block_node = get_ilpsched_irn(env, get_nodes_block(irn));
193 ba = get_ilpsched_block_attr(block_node);
195 na->block_idx = ba->block_last_idx++;
199 * Add all nodes having no user in current block to last_nodes list.
201 static void collect_alap_root_nodes(ir_node *irn, void *walk_env) {
203 const ir_edge_t *edge;
204 be_ilpsched_irn_t *block_node;
205 ilpsched_block_attr_t *ba;
206 be_ilpsched_env_t *env = walk_env;
207 int has_block_user = 0;
208 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
211 if (! consider_for_sched(irn))
214 block = get_nodes_block(irn);
216 DBG((env->dbg, LEVEL_3, "%+F (%+F) is interesting, examining ... ", irn, block));
218 for (i = 0; i < 2; ++i) {
219 foreach_out_edge_kind(irn, edge, ekind[i]) {
220 ir_node *user = get_edge_src_irn(edge);
223 const ir_edge_t *user_edge;
225 if (get_irn_mode(user) == mode_X)
228 /* The ABI ensures, that there will be no ProjT nodes in the graph. */
229 for (j = 0; j < 2; ++j) {
230 foreach_out_edge_kind(user, user_edge, ekind[j]) {
231 ir_node *real_user = get_edge_src_irn(user_edge);
233 if (! is_Phi(real_user) && ! be_is_Keep(real_user) && get_nodes_block(real_user) == block) {
243 else if (is_Block(user)) {
246 else if (! is_Phi(user) && ! be_is_Keep(user) && get_nodes_block(user) == block) {
253 block_node = get_ilpsched_irn(env, block);
254 ba = get_ilpsched_block_attr(block_node);
256 ba->n_interesting_nodes++;
258 /* current irn has no user inside this block, add to queue */
259 if (! has_block_user) {
260 DB((env->dbg, LEVEL_3, "root node\n"));
261 waitq_put(ba->root_nodes, irn);
264 DB((env->dbg, LEVEL_3, "normal node\n"));
269 * Calculate the ASAP scheduling step for current irn.
271 static void calculate_irn_asap(ir_node *irn, void *walk_env) {
272 be_ilpsched_irn_t *node;
273 be_ilpsched_env_t *env = walk_env;
276 ilpsched_node_attr_t *na;
278 /* These nodes are handled separate */
279 if (! consider_for_sched(irn))
282 DBG((env->dbg, LEVEL_2, "Calculating ASAP of node %+F\n", irn));
284 node = get_ilpsched_irn(env, irn);
285 block = get_nodes_block(irn);
286 na = get_ilpsched_node_attr(node);
288 /* accumulate all transitive predecessors of current node */
289 for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
290 ir_node *pred = skip_Proj(get_irn_in_or_dep(irn, i));
291 be_ilpsched_irn_t *pred_node;
292 ilpsched_node_attr_t *pna;
295 if (be_is_Keep(pred))
296 pred = skip_Proj(get_irn_n(pred, 0));
298 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
301 pred_node = get_ilpsched_irn(env, pred);
302 pna = get_ilpsched_node_attr(pred_node);
303 idx = get_irn_idx(irn);
305 assert(pna->asap && "missing ASAP of predecessor");
308 We have not already visited this predecessor
309 -> accumulate it's predecessors
311 if (pna->visit_idx != idx) {
312 pna->visit_idx = idx;
313 na->transitive_block_nodes = bitset_or(na->transitive_block_nodes, pna->transitive_block_nodes);
314 DBG((env->dbg, LEVEL_3, "\taccumulating preds of %+F\n", pred));
318 /* every node is it's own transitive predecessor in block */
319 bitset_set(na->transitive_block_nodes, na->block_idx);
321 /* asap = number of transitive predecessors in this block */
322 na->asap = bitset_popcnt(na->transitive_block_nodes);
324 DBG((env->dbg, LEVEL_2, "\tcalculated ASAP is %u\n", na->asap));
328 * Accumulate the successors of all nodes from irn on upwards.
330 static void accumulate_succs(be_ilpsched_env_t *env, ir_node *irn) {
332 be_ilpsched_irn_t *node = get_ilpsched_irn(env, irn);
333 ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
334 ir_node *block = get_nodes_block(irn);
335 waitq *wq = new_waitq();
337 DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F\n", irn));
339 /* enqueue node for final alap calculation */
340 if (! na->enqueued) {
341 be_ilpsched_irn_t *block_node = get_ilpsched_irn(env, block);
342 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(block_node);
345 na->alap = ba->n_interesting_nodes;
346 waitq_put(env->alap_queue, node);
348 set_irn_link(irn, ba->head_ilp_nodes);
349 ba->head_ilp_nodes = irn;
350 DBG((env->dbg, LEVEL_5, "\t\tlinked %+F to ilp nodes of %+F, attr %p\n", irn, block, ba));
351 DBG((env->dbg, LEVEL_4, "\t\tenqueueing %+F for final ALAP calculation\n", irn));
354 for (i = 0, n = get_irn_ins_or_deps(irn); i < n; ++i) {
355 ir_node *pred = skip_Proj(get_irn_in_or_dep(irn, i));
357 be_ilpsched_irn_t *pred_node;
358 ilpsched_node_attr_t *pna;
360 if (be_is_Keep(pred))
361 pred = skip_Proj(get_irn_n(pred, 0));
363 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
366 pred_node = get_ilpsched_irn(env, pred);
367 pna = get_ilpsched_node_attr(pred_node);
368 idx = get_irn_idx(irn);
370 /* accumulate the successors */
371 if (pna->visit_idx != idx) {
372 pna->visit_idx = idx;
373 pna->transitive_block_nodes = bitset_or(pna->transitive_block_nodes, na->transitive_block_nodes);
375 /* set current node as successor */
376 bitset_set(pna->transitive_block_nodes, na->block_idx);
379 DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F to %+F\n", irn, pred));
383 /* process all predecessors */
384 while (! waitq_empty(wq)) {
385 accumulate_succs(env, waitq_get(wq));
392 * Calculate the ALAP scheduling step of all irns in current block.
393 * Depends on ASAP beeing calculated.
395 static void calculate_block_alap(ir_node *block, void *walk_env) {
396 be_ilpsched_env_t *env = walk_env;
397 be_ilpsched_irn_t *block_node = get_ilpsched_irn(env, block);
398 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(block_node);
400 assert(is_Block(block));
402 DBG((env->dbg, LEVEL_2, "Calculating ALAP for nodes in %+F (%u nodes)\n", block, ba->n_interesting_nodes));
404 /* TODO: Might be faster to use out edges and call phase_reinit_single_irn_data */
405 phase_reinit_block_irn_data(&env->ph, block);
407 /* calculate the alap of all nodes, starting at collected roots upwards */
408 while (! waitq_empty(ba->root_nodes)) {
409 accumulate_succs(env, waitq_get(ba->root_nodes));
412 /* we don't need it anymore */
413 del_waitq(ba->root_nodes);
414 ba->root_nodes = NULL;
416 /* all interesting nodes should have their successors accumulated now */
417 while (! waitq_empty(env->alap_queue)) {
418 be_ilpsched_irn_t *node = waitq_get(env->alap_queue);
419 ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
421 na->alap -= bitset_popcnt(na->transitive_block_nodes);
422 DBG((env->dbg, LEVEL_2, "\tALAP of %+F is %u (%u succs)\n", node->irn, na->alap, bitset_popcnt(na->transitive_block_nodes)));
427 * Check if node can be executed on given unit type.
429 static INLINE int is_valid_unit_type_for_node(const be_execution_unit_type_t *tp, be_ilpsched_irn_t *node) {
431 ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
433 for (i = na->n_unit_types - 1; i >= 0; --i) {
434 if (na->type_info[i].tp == tp)
442 * Create the ilp (add variables, build constraints, solve, build schedule from solution).
444 static void create_ilp(ir_node *block, void *walk_env) {
445 be_ilpsched_env_t *env = walk_env;
446 be_ilpsched_irn_t *block_node = get_ilpsched_irn(env, block);
447 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(block_node);
448 unsigned num_block_var = 0;
449 unsigned num_nodes = 0;
450 unsigned n_instr_max = env->cpu->bundle_size * env->cpu->bundels_per_cycle;
451 bitset_t *bs_block_irns = bitset_alloca(ba->block_last_idx);
452 FILE *logfile = NULL;
453 unsigned num_cst_assign, num_cst_prec, num_cst_resrc, num_cst_bundle;
459 struct obstack var_obst;
461 lc_timer_t *t_cst_assign;
462 lc_timer_t *t_cst_prec;
463 lc_timer_t *t_cst_rsrc;
464 lc_timer_t *t_cst_bundle;
465 double fact_var, fact_cst;
468 t_var = lc_timer_register("beilpsched_var", "create ilp variables");
469 t_cst_assign = lc_timer_register("beilpsched_cst_assign", "create assignment constraints");
470 t_cst_prec = lc_timer_register("beilpsched_cst_prec", "create precedence constraints");
471 t_cst_rsrc = lc_timer_register("beilpsched_cst_rsrc", "create resource constraints");
472 t_cst_bundle = lc_timer_register("beilpsched_cst_bundle", "create bundle constraints");
473 LC_STOP_AND_RESET_TIMER(t_var);
474 LC_STOP_AND_RESET_TIMER(t_cst_assign);
475 LC_STOP_AND_RESET_TIMER(t_cst_prec);
476 LC_STOP_AND_RESET_TIMER(t_cst_rsrc);
477 LC_STOP_AND_RESET_TIMER(t_cst_bundle);
478 #endif /* WITH_LIBCORE */
480 DBG((env->dbg, 255, "\n\n\n=========================================\n"));
481 DBG((env->dbg, 255, " ILP Scheduling for %+F\n", block));
482 DBG((env->dbg, 255, "=========================================\n\n"));
484 DBG((env->dbg, LEVEL_1, "Creating ILP Variables for nodes in %+F (%u interesting nodes)\n",
485 block, ba->n_interesting_nodes));
487 if (ba->n_interesting_nodes < 2) {
492 fact_var = ba->n_interesting_nodes < 25 ? 1.0 : 0.6;
493 fact_cst = ba->n_interesting_nodes < 25 ? 0.8 : 0.6;
494 lpp = new_lpp_userdef(
497 (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_var) + 1, /* num vars */
498 (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_cst) + 20, /* num cst */
499 1.2); /* grow factor */
500 obstack_init(&var_obst);
502 lc_timer_push(t_var);
503 foreach_linked_irns(ba->head_ilp_nodes, irn) {
504 const be_execution_unit_t ***execunits = arch_isa_get_allowed_execution_units(env->arch_env->isa, irn);
505 be_ilpsched_irn_t *node;
506 ilpsched_node_attr_t *na;
507 unsigned n_unit_types, tp_idx, unit_idx, cur_var, n_var, cur_unit;
509 /* count number of available unit types for this node */
510 for (n_unit_types = 0; execunits[n_unit_types]; ++n_unit_types)
513 node = get_ilpsched_irn(env, irn);
514 na = get_ilpsched_node_attr(node);
516 na->n_unit_types = n_unit_types;
517 na->type_info = NEW_ARR_D(unit_type_info_t, &var_obst, n_unit_types);
519 /* fill the type info array */
520 for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
521 for (unit_idx = 0; execunits[tp_idx][unit_idx]; ++unit_idx)
522 /* just count units of this type */;
524 na->type_info[tp_idx].tp = execunits[tp_idx][0]->tp;
525 na->type_info[tp_idx].n_units = unit_idx;
528 /* allocate space for ilp variables */
529 na->ilp_vars = NEW_ARR_D(int, &var_obst, n_unit_types * VALID_SCHED_INTERVAL(na));
530 memset(na->ilp_vars, -1, n_unit_types * VALID_SCHED_INTERVAL(na) * sizeof(na->ilp_vars[0]));
532 DBG((env->dbg, LEVEL_3, "\thandling %+F (asap %u, alap %u, unit types %u):\n",
533 irn, na->asap, na->alap, na->n_unit_types));
535 cur_var = cur_unit = n_var = 0;
536 /* create variables */
537 for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
538 for (t = na->asap - 1; t <= na->alap - 1; ++t) {
539 snprintf(buf, sizeof(buf), "n%u_%s_%u",
540 get_irn_idx(irn), na->type_info[tp_idx].tp->name, t);
541 na->ilp_vars[cur_var++] = lpp_add_var(lpp, buf, lpp_binary, (double)(t + 1));
544 DBG((env->dbg, LEVEL_4, "\t\tcreated ILP variable %s\n", buf));
548 DB((env->dbg, LEVEL_3, "%u variables created\n", n_var));
552 DBG((env->dbg, LEVEL_1, "... %u variables for %u nodes created (%g sec)\n",
553 num_block_var, num_nodes, lc_timer_elapsed_usec(t_var) / 1000000.0));
557 - the assignment constraints:
558 assure each node is executed once by exactly one (allowed) execution unit
559 - the precedence constraints:
560 assure that no data dependencies are violated
562 num_cst_assign = num_cst_prec = num_cst_resrc = num_cst_bundle = 0;
563 DBG((env->dbg, LEVEL_1, "Creating constraints for nodes in %+F:\n", block));
564 foreach_linked_irns(ba->head_ilp_nodes, irn) {
567 be_ilpsched_irn_t *node;
568 ilpsched_node_attr_t *na;
570 /* the assignment constraint */
571 lc_timer_push(t_cst_assign);
572 snprintf(buf, sizeof(buf), "assignment_cst_n%u", get_irn_idx(irn));
573 cst = lpp_add_cst_uniq(lpp, buf, lpp_equal, 1.0);
574 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
577 node = get_ilpsched_irn(env, irn);
578 na = get_ilpsched_node_attr(node);
581 lpp_set_factor_fast_bulk(lpp, cst, na->ilp_vars, na->n_unit_types * VALID_SCHED_INTERVAL(na), 1.0);
584 /* the precedence constraints */
585 lc_timer_push(t_cst_prec);
586 bs_block_irns = bitset_clear_all(bs_block_irns);
587 for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
588 ir_node *pred = skip_Proj(get_irn_in_or_dep(irn, i));
589 unsigned t_low, t_high;
590 be_ilpsched_irn_t *pred_node;
591 ilpsched_node_attr_t *pna;
593 if (be_is_Keep(pred))
594 pred = skip_Proj(get_irn_n(pred, 0));
596 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
599 pred_node = get_ilpsched_irn(env, pred);
600 pna = get_ilpsched_node_attr(pred_node);
602 assert(pna->asap > 0 && pna->alap >= pna->asap && "Invalid scheduling interval.");
604 if (! bitset_is_set(bs_block_irns, pna->block_idx))
605 bitset_set(bs_block_irns, pna->block_idx);
609 /* irn = n, pred = m */
610 t_low = MAX(na->asap, pna->asap);
611 t_high = MIN(na->alap, pna->alap);
612 for (t = t_low - 1; t <= t_high - 1; ++t) {
614 int int_na = (t >= na->asap - 1) ? t - na->asap + 2 : 0;
615 int int_pna = (t < pna->alap) ? pna->alap - t : 0;
616 int *tmp_var_idx = NEW_ARR_F(int, int_na * na->n_unit_types + int_pna * pna->n_unit_types);
618 snprintf(buf, sizeof(buf), "precedence_n%u_n%u_%u", get_irn_idx(pred), get_irn_idx(irn), t);
619 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, 1.0);
620 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
625 /* lpp_set_factor_fast_bulk needs variables sorted ascending by index */
626 if (na->ilp_vars[0] < pna->ilp_vars[0]) {
627 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
628 for (tn = na->asap - 1; tn <= t; ++tn) {
629 unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
630 tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
634 for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
635 for (tm = t; tm < pna->alap; ++tm) {
636 unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
637 tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
642 for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
643 for (tm = t; tm < pna->alap; ++tm) {
644 unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
645 tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
649 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
650 for (tn = na->asap - 1; tn <= t; ++tn) {
651 unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
652 tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
657 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
659 DEL_ARR_F(tmp_var_idx);
664 DBG((env->dbg, LEVEL_1, "\t%u assignement constraints (%g sec)\n",
665 num_cst_assign, lc_timer_elapsed_usec(t_cst_assign) / 1000000.0));
666 DBG((env->dbg, LEVEL_1, "\t%u precedence constraints (%g sec)\n",
667 num_cst_prec, lc_timer_elapsed_usec(t_cst_prec) / 1000000.0));
670 2nd: the resource constraints:
671 assure that for each time step not more instructions are scheduled
672 to the same unit types as units of this type are available
674 lc_timer_push(t_cst_rsrc);
675 for (glob_type_idx = env->cpu->n_unit_types - 1; glob_type_idx >= 0; --glob_type_idx) {
677 be_execution_unit_type_t *cur_tp = &env->cpu->unit_types[glob_type_idx];
679 for (t = 0; t < ba->n_interesting_nodes; ++t) {
681 int *tmp_var_idx = NEW_ARR_F(int, ba->n_interesting_nodes);
684 snprintf(buf, sizeof(buf), "resource_cst_%s_%u", cur_tp->name, t);
685 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)cur_tp->n_units);
686 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
689 foreach_linked_irns(ba->head_ilp_nodes, irn) {
690 be_ilpsched_irn_t *node = get_ilpsched_irn(env, irn);
691 ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
694 tp_idx = is_valid_unit_type_for_node(cur_tp, node);
696 if (tp_idx >= 0 && t >= na->asap - 1 && t <= na->alap - 1) {
697 int cur_var = ILPVAR_IDX(na, tp_idx, t);
698 tmp_var_idx[cur_idx++] = na->ilp_vars[cur_var];
703 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
705 DEL_ARR_F(tmp_var_idx);
709 DBG((env->dbg, LEVEL_1, "\t%u resource constraints (%g sec)\n",
710 num_cst_resrc, lc_timer_elapsed_usec(t_cst_rsrc) / 1000000.0));
713 3rd: bundle constraints:
714 assure, at most bundle_size * bundles_per_cycle instructions
715 can be started at a certain point.
717 lc_timer_push(t_cst_bundle);
718 for (t = 0; t < ba->n_interesting_nodes; ++t) {
720 int *tmp_var_idx = NEW_ARR_F(int, 0);
722 snprintf(buf, sizeof(buf), "bundle_cst_%u", t);
723 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)n_instr_max);
724 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
727 foreach_linked_irns(ba->head_ilp_nodes, irn) {
728 be_ilpsched_irn_t *node;
729 ilpsched_node_attr_t *na;
732 node = get_ilpsched_irn(env, irn);
733 na = get_ilpsched_node_attr(node);
735 if (t >= na->asap - 1 && t <= na->alap - 1) {
736 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
737 int idx = ILPVAR_IDX(na, tp_idx, t);
738 ARR_APP1(int, tmp_var_idx, na->ilp_vars[idx]);
743 if (ARR_LEN(tmp_var_idx) > 0)
744 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, ARR_LEN(tmp_var_idx), 1.0);
746 DEL_ARR_F(tmp_var_idx);
749 DBG((env->dbg, LEVEL_1, "\t%u bundle constraints (%g sec)\n",
750 num_cst_bundle, lc_timer_elapsed_usec(t_cst_bundle) / 1000000.0));
752 DBG((env->dbg, LEVEL_1, "ILP to solve: %u variables, %u constraints\n", lpp->var_next, lpp->cst_next));
755 if (firm_dbg_get_mask(env->dbg) & 1) {
758 snprintf(buf, sizeof(buf), "lpp_block_%lu.txt", get_irn_node_nr(block));
760 lpp_dump_plain(lpp, f);
762 snprintf(buf, sizeof(buf), "lpp_block_%lu.mps", get_irn_node_nr(block));
763 lpp_dump(lpp, "buf");
767 lpp_set_time_limit(lpp, env->opts->time_limit);
769 /* set logfile if requested */
770 if (strlen(env->opts->log_file) > 0) {
771 if (strcasecmp(env->opts->log_file, "stdout") == 0)
772 lpp_set_log(lpp, stdout);
773 else if (strcasecmp(env->opts->log_file, "stderr") == 0)
774 lpp_set_log(lpp, stderr);
776 logfile = fopen(env->opts->log_file, "w");
778 fprintf(stderr, "Could not open logfile '%s'! Logging disabled.\n", env->opts->log_file);
780 lpp_set_log(lpp, logfile);
784 lpp_solve_net(lpp, env->main_env->options->ilp_server, env->main_env->options->ilp_solver);
789 if (! lpp_is_sol_valid(lpp)) {
792 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.txt", get_irn_node_nr(block));
794 lpp_dump_plain(lpp, f);
796 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.mps", get_irn_node_nr(block));
798 dump_ir_block_graph(env->irg, "-assert");
800 assert(0 && "ILP solution is not feasible!");
803 DBG((env->dbg, LEVEL_1, "\nSolution:\n"));
804 DBG((env->dbg, LEVEL_1, "\tsend time: %g sec\n", lpp->send_time / 1000000.0));
805 DBG((env->dbg, LEVEL_1, "\treceive time: %g sec\n", lpp->recv_time / 1000000.0));
806 DBG((env->dbg, LEVEL_1, "\titerations: %d\n", lpp->iterations));
807 DBG((env->dbg, LEVEL_1, "\tsolution time: %g\n", lpp->sol_time));
808 DBG((env->dbg, LEVEL_1, "\tobjective function: %g\n", LPP_VALUE_IS_0(lpp->objval) ? 0.0 : lpp->objval));
809 DBG((env->dbg, LEVEL_1, "\tbest bound: %g\n", LPP_VALUE_IS_0(lpp->best_bound) ? 0.0 : lpp->best_bound));
812 foreach_linked_irns(ba->head_ilp_nodes, irn) {
813 be_ilpsched_irn_t *node;
814 ilpsched_node_attr_t *na;
818 node = get_ilpsched_irn(env, irn);
819 na = get_ilpsched_node_attr(node);
822 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
823 for (t = na->asap - 1; t <= na->alap - 1; ++t) {
824 double val = lpp_get_var_sol(lpp, na->ilp_vars[cur_var++]);
825 if (! LPP_VALUE_IS_0(val)) {
826 DBG((env->dbg, LEVEL_1, "Schedpoint of %+F is %u at unit type %s\n",
827 irn, t, na->type_info[tp_idx].tp->name));
833 obstack_free(&var_obst, NULL);
838 * Perform ILP scheduling on the given irg.
840 void be_ilp_sched(const be_irg_t *birg) {
841 be_ilpsched_env_t env;
842 const char *name = "be ilp scheduling";
844 FIRM_DBG_REGISTER(env.dbg, "firm.be.sched.ilp");
846 //firm_dbg_set_mask(env.dbg, 31);
849 env.main_env = birg->main_env;
850 env.alap_queue = new_waitq();
851 env.arch_env = birg->main_env->arch_env;
852 env.cpu = arch_isa_get_machine(birg->main_env->arch_env->isa);
853 env.opts = &ilp_opts;
854 phase_init(&env.ph, name, env.irg, PHASE_DEFAULT_GROWTH, init_ilpsched_irn);
856 irg_walk_in_or_dep_graph(env.irg, NULL, build_block_idx, &env);
859 The block indicees are completely build after the walk,
860 now we can allocate the bitsets (size depends on block indicees)
863 phase_reinit_irn_data(&env.ph);
865 /* Collect all root nodes (having no user in their block) and calculate ASAP. */
866 irg_walk_in_or_dep_blkwise_graph(env.irg, collect_alap_root_nodes, calculate_irn_asap, &env);
868 /* calculate ALAP and create variables */
869 irg_block_walk_graph(env.irg, calculate_block_alap, create_ilp, &env);
872 if (firm_dbg_get_mask(env.dbg)) {
874 phase_stat_t *stat_ptr = phase_stat(&env.ph, &stat);
876 fprintf(stderr, "Phase used: %u bytes\n", stat_ptr->overall_bytes);
880 /* free all allocated object */
881 del_waitq(env.alap_queue);
887 * Register ILP scheduler options.
889 void ilpsched_register_options(lc_opt_entry_t *grp) {
890 static int run_once = 0;
891 lc_opt_entry_t *sched_grp;
895 sched_grp = lc_opt_get_grp(grp, "ilpsched");
897 lc_opt_add_table(sched_grp, ilpsched_option_table);
900 #endif /* WITH_LIBCORE */
904 static int some_picky_compiler_do_not_allow_empty_files;
906 #endif /* WITH_ILP */