2 * Scheduling algorithms.
3 * An ILP scheduler based on
4 * "ILP-based Instruction Scheduling for IA-64"
5 * by Daniel Kaestner and Sebastian Winkel
8 * @author Christian Wuerdig
26 #include "irphase_t.h"
34 #include <lpp/lpp_net.h>
37 #include <libcore/lc_opts.h>
38 #include <libcore/lc_opts_enum.h>
39 #include <libcore/lc_timing.h>
40 #endif /* WITH_LIBCORE */
44 #include "besched_t.h"
46 typedef struct _ilpsched_options_t {
51 typedef struct _unit_type_info_t {
53 const be_execution_unit_type_t *tp;
56 /* attributes for a node */
57 typedef struct _ilpsched_node_attr_t {
58 unsigned asap; /**< The ASAP scheduling control step */
59 unsigned alap; /**< The ALAP scheduling control step */
60 unsigned sched_point; /**< the step in which the node is finally scheduled */
61 unsigned visit_idx; /**< Index of the node having visited this node last */
62 unsigned block_idx : 31; /**< A unique per block index */
63 unsigned enqueued : 1; /**< Node is already enqueued for ALAP calculation */
64 bitset_t *transitive_block_nodes; /**< Set of transitive block nodes (predecessors
65 for ASAP, successors for ALAP */
66 unsigned n_unit_types; /**< number of allowed execution unit types */
67 unit_type_info_t *type_info; /**< list of allowed execution unit types */
68 int *ilp_vars; /**< the binary ilp variables x_{nt}^k assigned to this node
69 (== 1 iff: node n is executed at step t on execunit k
70 So we have: |ASAP(n) ... ALAP(n)| * |execunits| variables
72 } ilpsched_node_attr_t;
74 /* attributes for a block */
75 typedef struct _ilpsched_block_attr_t {
76 unsigned block_last_idx; /**< The highest node index in block so far */
77 unsigned n_interesting_nodes; /**< The number of nodes interesting for scheduling */
78 waitq *root_nodes; /**< A queue of nodes having no user in current block */
79 ir_node *head_ilp_nodes; /**< A linked list of nodes which will contribute to ILP */
80 } ilpsched_block_attr_t;
82 typedef union _ilpsched_attr_ {
83 ilpsched_node_attr_t node_attr;
84 ilpsched_block_attr_t block_attr;
87 /* A irn for the phase and it's attributes (either node or block) */
93 /* The ILP scheduling environment */
95 phase_t ph; /**< The phase */
96 ir_graph *irg; /**< The current irg */
97 waitq *alap_queue; /**< An queue of nodes waiting for final ALAP calculation */
98 const arch_env_t *arch_env;
99 const be_main_env_t *main_env;
100 const be_machine_t *cpu; /**< the current abstract machine */
101 ilpsched_options_t *opts; /**< the ilp options for current irg */
102 DEBUG_ONLY(firm_dbg_module_t *dbg);
105 /* convenience macros to handle phase irn data */
106 #define get_ilpsched_irn(ilpsched_env, irn) (phase_get_or_set_irn_data(&(ilpsched_env)->ph, (irn)))
107 #define is_ilpsched_block(node) (is_Block((node)->irn))
108 #define get_ilpsched_block_attr(block) (&(block)->attr.block_attr)
109 #define get_ilpsched_node_attr(node) (&(node)->attr.node_attr)
111 /* iterate over a list of ir_nodes linked by link field */
112 #define foreach_linked_irns(head, iter) for ((iter) = (head); (iter); (iter) = get_irn_link((iter)))
114 /* check if node is considered for ILP scheduling */
115 #define consider_for_sched(irn) \
116 (! (is_Block(irn) || \
125 /* gives the valid scheduling time step interval for a node */
126 #define VALID_SCHED_INTERVAL(na) ((na)->alap - (na)->asap + 1)
128 /* gives the corresponding ILP variable for given node, unit and time step */
129 #define ILPVAR_IDX(na, unit, control_step) \
130 ((unit) * VALID_SCHED_INTERVAL((na)) + (control_step) - (na)->asap + 1)
132 /* check if a double value is within an epsilon environment of 0 */
133 #define LPP_VALUE_IS_0(dbl) (fabs((dbl)) <= 1e-10)
135 /* option variable */
136 static ilpsched_options_t ilp_opts = {
137 120, /* 120 sec per block time limit */
143 static const lc_opt_table_entry_t ilpsched_option_table[] = {
144 LC_OPT_ENT_INT("time_limit", "ILP time limit per block", &ilp_opts.time_limit),
145 LC_OPT_ENT_STR("lpp_log", "LPP logfile (stderr and stdout are supported)", ilp_opts.log_file, sizeof(ilp_opts.log_file)),
148 #endif /* WITH_LIBCORE */
151 * Compare scheduling time steps of two be_ilpsched_irn's.
153 static int cmp_ilpsched_irn(const void *a, const void *b) {
154 be_ilpsched_irn_t *n1 = *(be_ilpsched_irn_t **)a;
155 be_ilpsched_irn_t *n2 = *(be_ilpsched_irn_t **)b;
156 ilpsched_node_attr_t *n1_a = get_ilpsched_node_attr(n1);
157 ilpsched_node_attr_t *n2_a = get_ilpsched_node_attr(n2);
159 return QSORT_CMP(n1_a->sched_point, n2_a->sched_point);
163 * In case there is no phase information for irn, initialize it.
165 static void *init_ilpsched_irn(phase_t *ph, ir_node *irn, void *old) {
166 be_ilpsched_irn_t *res = old ? old : phase_alloc(ph, sizeof(res[0]));
169 /* if we have already some data: check for reinitialization */
171 if (! is_Block(irn)) {
172 ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
174 if (! na->transitive_block_nodes) {
175 ir_node *block = get_nodes_block(irn);
176 be_ilpsched_irn_t *block_node = phase_get_or_set_irn_data(ph, block);
177 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(block_node);
179 /* we are called after the block indicees have been build: create bitset */
180 na->transitive_block_nodes = bitset_obstack_alloc(phase_obst(ph), ba->block_last_idx);
183 /* we are called from reinit block data: clear the bitset */
184 bitset_clear_all(na->transitive_block_nodes);
193 /* set ilpsched irn attributes (either block or irn) */
195 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(res);
197 ba->n_interesting_nodes = 0;
198 ba->block_last_idx = 0;
199 ba->root_nodes = new_waitq();
200 ba->head_ilp_nodes = NULL;
203 ilpsched_node_attr_t *na = get_ilpsched_node_attr(res);
204 memset(na, 0, sizeof(*na));
211 * Assign a per block unique number to each node.
213 static void build_block_idx(ir_node *irn, void *walk_env) {
214 be_ilpsched_env_t *env = walk_env;
215 be_ilpsched_irn_t *node, *block_node;
216 ilpsched_node_attr_t *na;
217 ilpsched_block_attr_t *ba;
219 if (! consider_for_sched(irn))
222 node = get_ilpsched_irn(env, irn);
223 na = get_ilpsched_node_attr(node);
224 block_node = get_ilpsched_irn(env, get_nodes_block(irn));
225 ba = get_ilpsched_block_attr(block_node);
227 na->block_idx = ba->block_last_idx++;
230 /********************************************************
233 * __ _ ___ __ _ _ __ / / __ _| | __ _ _ __
234 * / _` / __|/ _` | '_ \ / / / _` | |/ _` | '_ \
235 * | (_| \__ \ (_| | |_) | / / | (_| | | (_| | |_) |
236 * \__,_|___/\__,_| .__/ /_/ \__,_|_|\__,_| .__/
239 ********************************************************/
242 * Add all nodes having no user in current block to last_nodes list.
244 static void collect_alap_root_nodes(ir_node *irn, void *walk_env) {
246 const ir_edge_t *edge;
247 be_ilpsched_irn_t *block_node;
248 ilpsched_block_attr_t *ba;
249 be_ilpsched_env_t *env = walk_env;
250 int has_block_user = 0;
251 ir_edge_kind_t ekind[2] = { EDGE_KIND_NORMAL, EDGE_KIND_DEP };
254 if (! consider_for_sched(irn))
257 block = get_nodes_block(irn);
259 DBG((env->dbg, LEVEL_3, "%+F (%+F) is interesting, examining ... ", irn, block));
261 for (i = 0; i < 2; ++i) {
262 foreach_out_edge_kind(irn, edge, ekind[i]) {
263 ir_node *user = get_edge_src_irn(edge);
266 const ir_edge_t *user_edge;
268 if (get_irn_mode(user) == mode_X)
271 /* The ABI ensures, that there will be no ProjT nodes in the graph. */
272 for (j = 0; j < 2; ++j) {
273 foreach_out_edge_kind(user, user_edge, ekind[j]) {
274 ir_node *real_user = get_edge_src_irn(user_edge);
276 if (! is_Phi(real_user) && ! be_is_Keep(real_user) && get_nodes_block(real_user) == block) {
286 else if (is_Block(user)) {
289 else if (! is_Phi(user) && ! be_is_Keep(user) && get_nodes_block(user) == block) {
296 block_node = get_ilpsched_irn(env, block);
297 ba = get_ilpsched_block_attr(block_node);
299 ba->n_interesting_nodes++;
301 /* current irn has no user inside this block, add to queue */
302 if (! has_block_user) {
303 DB((env->dbg, LEVEL_3, "root node\n"));
304 waitq_put(ba->root_nodes, irn);
307 DB((env->dbg, LEVEL_3, "normal node\n"));
312 * Calculate the ASAP scheduling step for current irn.
314 static void calculate_irn_asap(ir_node *irn, void *walk_env) {
315 be_ilpsched_irn_t *node;
316 be_ilpsched_env_t *env = walk_env;
319 ilpsched_node_attr_t *na;
321 /* These nodes are handled separate */
322 if (! consider_for_sched(irn))
325 DBG((env->dbg, LEVEL_2, "Calculating ASAP of node %+F\n", irn));
327 node = get_ilpsched_irn(env, irn);
328 block = get_nodes_block(irn);
329 na = get_ilpsched_node_attr(node);
331 /* accumulate all transitive predecessors of current node */
332 for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
333 ir_node *pred = skip_Proj(get_irn_in_or_dep(irn, i));
334 be_ilpsched_irn_t *pred_node;
335 ilpsched_node_attr_t *pna;
338 if (be_is_Keep(pred))
339 pred = skip_Proj(get_irn_n(pred, 0));
341 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
344 pred_node = get_ilpsched_irn(env, pred);
345 pna = get_ilpsched_node_attr(pred_node);
346 idx = get_irn_idx(irn);
348 assert(pna->asap && "missing ASAP of predecessor");
351 We have not already visited this predecessor
352 -> accumulate it's predecessors
354 if (pna->visit_idx != idx) {
355 pna->visit_idx = idx;
356 na->transitive_block_nodes = bitset_or(na->transitive_block_nodes, pna->transitive_block_nodes);
357 DBG((env->dbg, LEVEL_3, "\taccumulating preds of %+F\n", pred));
361 /* every node is it's own transitive predecessor in block */
362 bitset_set(na->transitive_block_nodes, na->block_idx);
364 /* asap = number of transitive predecessors in this block */
365 na->asap = bitset_popcnt(na->transitive_block_nodes);
367 DBG((env->dbg, LEVEL_2, "\tcalculated ASAP is %u\n", na->asap));
371 * Accumulate the successors of all nodes from irn on upwards.
373 static void accumulate_succs(be_ilpsched_env_t *env, ir_node *irn) {
375 be_ilpsched_irn_t *node = get_ilpsched_irn(env, irn);
376 ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
377 ir_node *block = get_nodes_block(irn);
378 waitq *wq = new_waitq();
380 DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F\n", irn));
382 /* enqueue node for final alap calculation */
383 if (! na->enqueued) {
384 be_ilpsched_irn_t *block_node = get_ilpsched_irn(env, block);
385 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(block_node);
388 na->alap = ba->n_interesting_nodes;
389 waitq_put(env->alap_queue, node);
391 set_irn_link(irn, ba->head_ilp_nodes);
392 ba->head_ilp_nodes = irn;
393 DBG((env->dbg, LEVEL_5, "\t\tlinked %+F to ilp nodes of %+F, attr %p\n", irn, block, ba));
394 DBG((env->dbg, LEVEL_4, "\t\tenqueueing %+F for final ALAP calculation\n", irn));
397 for (i = 0, n = get_irn_ins_or_deps(irn); i < n; ++i) {
398 ir_node *pred = skip_Proj(get_irn_in_or_dep(irn, i));
400 be_ilpsched_irn_t *pred_node;
401 ilpsched_node_attr_t *pna;
403 if (be_is_Keep(pred))
404 pred = skip_Proj(get_irn_n(pred, 0));
406 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
409 pred_node = get_ilpsched_irn(env, pred);
410 pna = get_ilpsched_node_attr(pred_node);
411 idx = get_irn_idx(irn);
413 /* accumulate the successors */
414 if (pna->visit_idx != idx) {
415 pna->visit_idx = idx;
416 pna->transitive_block_nodes = bitset_or(pna->transitive_block_nodes, na->transitive_block_nodes);
418 /* set current node as successor */
419 bitset_set(pna->transitive_block_nodes, na->block_idx);
422 DBG((env->dbg, LEVEL_3, "\taccumulating succs of %+F to %+F\n", irn, pred));
426 /* process all predecessors */
427 while (! waitq_empty(wq)) {
428 accumulate_succs(env, waitq_get(wq));
435 * Calculate the ALAP scheduling step of all irns in current block.
436 * Depends on ASAP beeing calculated.
438 static void calculate_block_alap(ir_node *block, void *walk_env) {
439 be_ilpsched_env_t *env = walk_env;
440 be_ilpsched_irn_t *block_node = get_ilpsched_irn(env, block);
441 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(block_node);
443 assert(is_Block(block));
445 DBG((env->dbg, LEVEL_2, "Calculating ALAP for nodes in %+F (%u nodes)\n", block, ba->n_interesting_nodes));
447 /* TODO: Might be faster to use out edges and call phase_reinit_single_irn_data */
448 phase_reinit_block_irn_data(&env->ph, block);
450 /* calculate the alap of all nodes, starting at collected roots upwards */
451 while (! waitq_empty(ba->root_nodes)) {
452 accumulate_succs(env, waitq_get(ba->root_nodes));
455 /* we don't need it anymore */
456 del_waitq(ba->root_nodes);
457 ba->root_nodes = NULL;
459 /* all interesting nodes should have their successors accumulated now */
460 while (! waitq_empty(env->alap_queue)) {
461 be_ilpsched_irn_t *node = waitq_get(env->alap_queue);
462 ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
464 /* control flow ops must always be scheduled last */
465 if (is_cfop(node->irn) && ! is_Start(node->irn) && get_irn_opcode(node->irn) != iro_End)
468 na->alap -= bitset_popcnt(na->transitive_block_nodes);
469 DBG((env->dbg, LEVEL_2, "\tALAP of %+F is %u (%u succs)\n", node->irn, na->alap, bitset_popcnt(na->transitive_block_nodes)));
474 /*************************
482 *************************/
485 * Check if node can be executed on given unit type.
487 static INLINE int is_valid_unit_type_for_node(const be_execution_unit_type_t *tp, be_ilpsched_irn_t *node) {
489 ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
491 for (i = na->n_unit_types - 1; i >= 0; --i) {
492 if (na->type_info[i].tp == tp)
499 static INLINE void check_for_keeps(waitq *keeps, ir_node *block, ir_node *irn) {
500 const ir_edge_t *edge;
502 foreach_out_edge(irn, edge) {
503 ir_node *user = get_edge_src_irn(edge);
505 if (be_is_Keep(user)) {
506 assert(get_nodes_block(user) == block && "Keep must not be in different block.");
507 waitq_put(keeps, user);
513 * Adds a node, it's Projs (in case of mode_T nodes) and
514 * it's Keeps to schedule.
516 static void add_to_sched(ir_node *block, ir_node *irn) {
517 const ir_edge_t *edge;
518 waitq *keeps = new_waitq();
520 if (get_irn_mode(irn) == mode_M)
523 sched_add_before(block, irn);
526 if (get_irn_mode(irn) == mode_T) {
527 foreach_out_edge(irn, edge) {
528 ir_node *user = get_edge_src_irn(edge);
530 assert(is_Proj(user) && "User of mode_T node must be a Proj");
532 if (to_appear_in_schedule(user))
533 sched_add_before(block, user);
535 check_for_keeps(keeps, block, user);
539 check_for_keeps(keeps, block, irn);
543 while (! waitq_empty(keeps)) {
544 ir_node *keep = waitq_get(keeps);
545 if (! sched_is_scheduled(keep))
546 sched_add_before(block, keep);
553 * Schedule all nodes in the given block, according to the ILP solution.
555 static void apply_solution(be_ilpsched_env_t *env, lpp_t *lpp, ir_node *block) {
556 be_ilpsched_irn_t *block_node = get_ilpsched_irn(env, block);
557 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(block_node);
558 sched_info_t *info = get_irn_sched_info(block);
559 be_ilpsched_irn_t **sched_nodes;
562 const ir_edge_t *edge;
564 /* init block schedule list */
565 INIT_LIST_HEAD(&info->list);
568 /* collect nodes and their scheduling time step */
569 sched_nodes = NEW_ARR_F(be_ilpsched_irn_t *, 0);
570 if (ba->n_interesting_nodes == 0) {
573 else if (ba->n_interesting_nodes == 1) {
574 be_ilpsched_irn_t *node = get_ilpsched_irn(env, ba->head_ilp_nodes);
576 /* add the single node */
577 ARR_APP1(be_ilpsched_irn_t *, sched_nodes, node);
580 foreach_linked_irns(ba->head_ilp_nodes, irn) {
581 be_ilpsched_irn_t *node;
582 ilpsched_node_attr_t *na;
586 node = get_ilpsched_irn(env, irn);
587 na = get_ilpsched_node_attr(node);
591 for (tp_idx = na->n_unit_types - 1; ! found && tp_idx >= 0; --tp_idx) {
592 for (t = na->asap - 1; ! found && t <= na->alap - 1; ++t) {
593 double val = lpp_get_var_sol(lpp, na->ilp_vars[cur_var++]);
594 if (! LPP_VALUE_IS_0(val)) {
596 ARR_APP1(be_ilpsched_irn_t *, sched_nodes, node);
597 DBG((env->dbg, LEVEL_1, "Schedpoint of %+F is %u at unit type %s\n",
598 irn, t, na->type_info[tp_idx].tp->name));
605 /* sort nodes ascending by scheduling time step */
606 qsort(sched_nodes, ARR_LEN(sched_nodes), sizeof(sched_nodes[0]), cmp_ilpsched_irn);
609 /* make all Phis ready and remember the single cf op */
611 foreach_out_edge(block, edge) {
612 irn = get_edge_src_irn(edge);
614 switch (get_irn_opcode(irn)) {
616 add_to_sched(block, irn);
625 assert(cfop == NULL && "Highlander!");
632 /* add all nodes from list */
633 for (i = 0, l = ARR_LEN(sched_nodes); i < l; ++i) {
634 add_to_sched(block, sched_nodes[i]->irn);
637 /* schedule control flow node if not already done */
638 if (cfop && ! sched_is_scheduled(cfop))
639 add_to_sched(block, cfop);
641 DEL_ARR_F(sched_nodes);
645 * Create the ilp (add variables, build constraints, solve, build schedule from solution).
647 static void create_ilp(ir_node *block, void *walk_env) {
648 be_ilpsched_env_t *env = walk_env;
649 be_ilpsched_irn_t *block_node = get_ilpsched_irn(env, block);
650 ilpsched_block_attr_t *ba = get_ilpsched_block_attr(block_node);
651 unsigned num_block_var = 0;
652 unsigned num_nodes = 0;
653 unsigned n_instr_max = env->cpu->bundle_size * env->cpu->bundels_per_cycle;
654 bitset_t *bs_block_irns = bitset_alloca(ba->block_last_idx);
655 FILE *logfile = NULL;
657 unsigned num_cst_assign, num_cst_prec, num_cst_resrc, num_cst_bundle;
662 struct obstack var_obst;
664 lc_timer_t *t_cst_assign;
665 lc_timer_t *t_cst_prec;
666 lc_timer_t *t_cst_rsrc;
667 lc_timer_t *t_cst_bundle;
668 double fact_var, fact_cst;
671 t_var = lc_timer_register("beilpsched_var", "create ilp variables");
672 t_cst_assign = lc_timer_register("beilpsched_cst_assign", "create assignment constraints");
673 t_cst_prec = lc_timer_register("beilpsched_cst_prec", "create precedence constraints");
674 t_cst_rsrc = lc_timer_register("beilpsched_cst_rsrc", "create resource constraints");
675 t_cst_bundle = lc_timer_register("beilpsched_cst_bundle", "create bundle constraints");
676 LC_STOP_AND_RESET_TIMER(t_var);
677 LC_STOP_AND_RESET_TIMER(t_cst_assign);
678 LC_STOP_AND_RESET_TIMER(t_cst_prec);
679 LC_STOP_AND_RESET_TIMER(t_cst_rsrc);
680 LC_STOP_AND_RESET_TIMER(t_cst_bundle);
681 #endif /* WITH_LIBCORE */
683 DBG((env->dbg, 255, "\n\n\n=========================================\n"));
684 DBG((env->dbg, 255, " ILP Scheduling for %+F\n", block));
685 DBG((env->dbg, 255, "=========================================\n\n"));
687 DBG((env->dbg, LEVEL_1, "Creating ILP Variables for nodes in %+F (%u interesting nodes)\n",
688 block, ba->n_interesting_nodes));
690 if (ba->n_interesting_nodes > 1) {
691 fact_var = ba->n_interesting_nodes < 25 ? 1.0 : 0.6;
692 fact_cst = ba->n_interesting_nodes < 25 ? 0.8 : 0.6;
694 lpp = new_lpp_userdef(
697 (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_var) + 1, /* num vars */
698 (int)((double)(ba->n_interesting_nodes * ba->n_interesting_nodes) * fact_cst) + 20, /* num cst */
699 1.2); /* grow factor */
700 obstack_init(&var_obst);
702 lc_timer_push(t_var);
703 foreach_linked_irns(ba->head_ilp_nodes, irn) {
704 const be_execution_unit_t ***execunits = arch_isa_get_allowed_execution_units(env->arch_env->isa, irn);
705 be_ilpsched_irn_t *node;
706 ilpsched_node_attr_t *na;
707 unsigned n_unit_types, tp_idx, unit_idx, cur_var, n_var, cur_unit;
709 /* count number of available unit types for this node */
710 for (n_unit_types = 0; execunits[n_unit_types]; ++n_unit_types)
713 node = get_ilpsched_irn(env, irn);
714 na = get_ilpsched_node_attr(node);
716 na->n_unit_types = n_unit_types;
717 na->type_info = NEW_ARR_D(unit_type_info_t, &var_obst, n_unit_types);
719 /* fill the type info array */
720 for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
721 for (unit_idx = 0; execunits[tp_idx][unit_idx]; ++unit_idx)
722 /* just count units of this type */;
724 na->type_info[tp_idx].tp = execunits[tp_idx][0]->tp;
725 na->type_info[tp_idx].n_units = unit_idx;
728 /* allocate space for ilp variables */
729 na->ilp_vars = NEW_ARR_D(int, &var_obst, n_unit_types * VALID_SCHED_INTERVAL(na));
730 memset(na->ilp_vars, -1, n_unit_types * VALID_SCHED_INTERVAL(na) * sizeof(na->ilp_vars[0]));
732 DBG((env->dbg, LEVEL_3, "\thandling %+F (asap %u, alap %u, unit types %u):\n",
733 irn, na->asap, na->alap, na->n_unit_types));
735 cur_var = cur_unit = n_var = 0;
736 /* create variables */
737 for (tp_idx = 0; tp_idx < n_unit_types; ++tp_idx) {
738 for (t = na->asap - 1; t <= na->alap - 1; ++t) {
739 snprintf(buf, sizeof(buf), "n%u_%s_%u",
740 get_irn_idx(irn), na->type_info[tp_idx].tp->name, t);
741 na->ilp_vars[cur_var++] = lpp_add_var(lpp, buf, lpp_binary, (double)(t + 1));
744 DBG((env->dbg, LEVEL_4, "\t\tcreated ILP variable %s\n", buf));
748 DB((env->dbg, LEVEL_3, "%u variables created\n", n_var));
752 DBG((env->dbg, LEVEL_1, "... %u variables for %u nodes created (%g sec)\n",
753 num_block_var, num_nodes, lc_timer_elapsed_usec(t_var) / 1000000.0));
757 - the assignment constraints:
758 assure each node is executed once by exactly one (allowed) execution unit
759 - the precedence constraints:
760 assure that no data dependencies are violated
762 num_cst_assign = num_cst_prec = num_cst_resrc = num_cst_bundle = 0;
763 DBG((env->dbg, LEVEL_1, "Creating constraints for nodes in %+F:\n", block));
764 foreach_linked_irns(ba->head_ilp_nodes, irn) {
767 be_ilpsched_irn_t *node;
768 ilpsched_node_attr_t *na;
770 /* the assignment constraint */
771 lc_timer_push(t_cst_assign);
772 snprintf(buf, sizeof(buf), "assignment_cst_n%u", get_irn_idx(irn));
773 cst = lpp_add_cst_uniq(lpp, buf, lpp_equal, 1.0);
774 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
777 node = get_ilpsched_irn(env, irn);
778 na = get_ilpsched_node_attr(node);
781 lpp_set_factor_fast_bulk(lpp, cst, na->ilp_vars, na->n_unit_types * VALID_SCHED_INTERVAL(na), 1.0);
784 /* the precedence constraints */
785 lc_timer_push(t_cst_prec);
786 bs_block_irns = bitset_clear_all(bs_block_irns);
787 for (i = get_irn_ins_or_deps(irn) - 1; i >= 0; --i) {
788 ir_node *pred = skip_Proj(get_irn_in_or_dep(irn, i));
789 unsigned t_low, t_high;
790 be_ilpsched_irn_t *pred_node;
791 ilpsched_node_attr_t *pna;
793 if (be_is_Keep(pred))
794 pred = skip_Proj(get_irn_n(pred, 0));
796 if (is_Phi(pred) || block != get_nodes_block(pred) || is_NoMem(pred))
799 pred_node = get_ilpsched_irn(env, pred);
800 pna = get_ilpsched_node_attr(pred_node);
802 assert(pna->asap > 0 && pna->alap >= pna->asap && "Invalid scheduling interval.");
804 if (! bitset_is_set(bs_block_irns, pna->block_idx))
805 bitset_set(bs_block_irns, pna->block_idx);
809 /* irn = n, pred = m */
810 t_low = MAX(na->asap, pna->asap);
811 t_high = MIN(na->alap, pna->alap);
812 for (t = t_low - 1; t <= t_high - 1; ++t) {
814 int int_na = (t >= na->asap - 1) ? t - na->asap + 2 : 0;
815 int int_pna = (t < pna->alap) ? pna->alap - t : 0;
816 int *tmp_var_idx = NEW_ARR_F(int, int_na * na->n_unit_types + int_pna * pna->n_unit_types);
818 snprintf(buf, sizeof(buf), "precedence_n%u_n%u_%u", get_irn_idx(pred), get_irn_idx(irn), t);
819 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, 1.0);
820 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
825 /* lpp_set_factor_fast_bulk needs variables sorted ascending by index */
826 if (na->ilp_vars[0] < pna->ilp_vars[0]) {
827 /* node variables have smaller index than pred variables */
828 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
829 for (tn = na->asap - 1; tn <= t; ++tn) {
830 unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
831 tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
835 for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
836 for (tm = t; tm < pna->alap; ++tm) {
837 unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
838 tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
843 /* pred variables have smaller index than node variables */
844 for (tp_idx = pna->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
845 for (tm = t; tm < pna->alap; ++tm) {
846 unsigned idx = ILPVAR_IDX(pna, tp_idx, tm);
847 tmp_var_idx[cur_idx++] = pna->ilp_vars[idx];
851 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
852 for (tn = na->asap - 1; tn <= t; ++tn) {
853 unsigned idx = ILPVAR_IDX(na, tp_idx, tn);
854 tmp_var_idx[cur_idx++] = na->ilp_vars[idx];
859 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
861 DEL_ARR_F(tmp_var_idx);
866 DBG((env->dbg, LEVEL_1, "\t%u assignement constraints (%g sec)\n",
867 num_cst_assign, lc_timer_elapsed_usec(t_cst_assign) / 1000000.0));
868 DBG((env->dbg, LEVEL_1, "\t%u precedence constraints (%g sec)\n",
869 num_cst_prec, lc_timer_elapsed_usec(t_cst_prec) / 1000000.0));
872 2nd: the resource constraints:
873 assure that for each time step not more instructions are scheduled
874 to the same unit types as units of this type are available
876 lc_timer_push(t_cst_rsrc);
877 for (glob_type_idx = env->cpu->n_unit_types - 1; glob_type_idx >= 0; --glob_type_idx) {
879 be_execution_unit_type_t *cur_tp = &env->cpu->unit_types[glob_type_idx];
881 for (t = 0; t < ba->n_interesting_nodes; ++t) {
883 int *tmp_var_idx = NEW_ARR_F(int, ba->n_interesting_nodes);
886 snprintf(buf, sizeof(buf), "resource_cst_%s_%u", cur_tp->name, t);
887 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)cur_tp->n_units);
888 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
891 foreach_linked_irns(ba->head_ilp_nodes, irn) {
892 be_ilpsched_irn_t *node = get_ilpsched_irn(env, irn);
893 ilpsched_node_attr_t *na = get_ilpsched_node_attr(node);
896 tp_idx = is_valid_unit_type_for_node(cur_tp, node);
898 if (tp_idx >= 0 && t >= na->asap - 1 && t <= na->alap - 1) {
899 int cur_var = ILPVAR_IDX(na, tp_idx, t);
900 tmp_var_idx[cur_idx++] = na->ilp_vars[cur_var];
905 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, cur_idx, 1.0);
907 DEL_ARR_F(tmp_var_idx);
911 DBG((env->dbg, LEVEL_1, "\t%u resource constraints (%g sec)\n",
912 num_cst_resrc, lc_timer_elapsed_usec(t_cst_rsrc) / 1000000.0));
915 3rd: bundle constraints:
916 assure, at most bundle_size * bundles_per_cycle instructions
917 can be started at a certain point.
919 lc_timer_push(t_cst_bundle);
920 for (t = 0; t < ba->n_interesting_nodes; ++t) {
922 int *tmp_var_idx = NEW_ARR_F(int, 0);
924 snprintf(buf, sizeof(buf), "bundle_cst_%u", t);
925 cst = lpp_add_cst_uniq(lpp, buf, lpp_less, (double)n_instr_max);
926 DBG((env->dbg, LEVEL_2, "added constraint %s\n", buf));
929 foreach_linked_irns(ba->head_ilp_nodes, irn) {
930 be_ilpsched_irn_t *node;
931 ilpsched_node_attr_t *na;
934 node = get_ilpsched_irn(env, irn);
935 na = get_ilpsched_node_attr(node);
937 if (t >= na->asap - 1 && t <= na->alap - 1) {
938 for (tp_idx = na->n_unit_types - 1; tp_idx >= 0; --tp_idx) {
939 int idx = ILPVAR_IDX(na, tp_idx, t);
940 ARR_APP1(int, tmp_var_idx, na->ilp_vars[idx]);
945 if (ARR_LEN(tmp_var_idx) > 0)
946 lpp_set_factor_fast_bulk(lpp, cst, tmp_var_idx, ARR_LEN(tmp_var_idx), 1.0);
948 DEL_ARR_F(tmp_var_idx);
951 DBG((env->dbg, LEVEL_1, "\t%u bundle constraints (%g sec)\n",
952 num_cst_bundle, lc_timer_elapsed_usec(t_cst_bundle) / 1000000.0));
954 DBG((env->dbg, LEVEL_1, "ILP to solve: %u variables, %u constraints\n", lpp->var_next, lpp->cst_next));
956 /* debug stuff, dump lpp when debugging is on */
958 if (firm_dbg_get_mask(env->dbg) > 0) {
961 snprintf(buf, sizeof(buf), "lpp_block_%lu.txt", get_irn_node_nr(block));
963 lpp_dump_plain(lpp, f);
965 snprintf(buf, sizeof(buf), "lpp_block_%lu.mps", get_irn_node_nr(block));
966 lpp_dump(lpp, "buf");
970 /* set solve time limit */
971 lpp_set_time_limit(lpp, env->opts->time_limit);
973 /* set logfile if requested */
974 if (strlen(env->opts->log_file) > 0) {
975 if (strcasecmp(env->opts->log_file, "stdout") == 0)
976 lpp_set_log(lpp, stdout);
977 else if (strcasecmp(env->opts->log_file, "stderr") == 0)
978 lpp_set_log(lpp, stderr);
980 logfile = fopen(env->opts->log_file, "w");
982 fprintf(stderr, "Could not open logfile '%s'! Logging disabled.\n", env->opts->log_file);
984 lpp_set_log(lpp, logfile);
989 lpp_solve_net(lpp, env->main_env->options->ilp_server, env->main_env->options->ilp_solver);
994 /* check for valid solution */
995 if (! lpp_is_sol_valid(lpp)) {
998 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.txt", get_irn_node_nr(block));
1000 lpp_dump_plain(lpp, f);
1002 snprintf(buf, sizeof(buf), "lpp_block_%lu.assert.mps", get_irn_node_nr(block));
1004 dump_ir_block_graph(env->irg, "-assert");
1006 assert(0 && "ILP solution is not feasible!");
1009 DBG((env->dbg, LEVEL_1, "\nSolution:\n"));
1010 DBG((env->dbg, LEVEL_1, "\tsend time: %g sec\n", lpp->send_time / 1000000.0));
1011 DBG((env->dbg, LEVEL_1, "\treceive time: %g sec\n", lpp->recv_time / 1000000.0));
1012 DBG((env->dbg, LEVEL_1, "\titerations: %d\n", lpp->iterations));
1013 DBG((env->dbg, LEVEL_1, "\tsolution time: %g\n", lpp->sol_time));
1014 DBG((env->dbg, LEVEL_1, "\tobjective function: %g\n", LPP_VALUE_IS_0(lpp->objval) ? 0.0 : lpp->objval));
1015 DBG((env->dbg, LEVEL_1, "\tbest bound: %g\n", LPP_VALUE_IS_0(lpp->best_bound) ? 0.0 : lpp->best_bound));
1017 obstack_free(&var_obst, NULL);
1020 /* apply solution */
1021 apply_solution(env, lpp, block);
1028 * Perform ILP scheduling on the given irg.
1030 void be_ilp_sched(const be_irg_t *birg) {
1031 be_ilpsched_env_t env;
1032 const char *name = "be ilp scheduling";
1034 FIRM_DBG_REGISTER(env.dbg, "firm.be.sched.ilp");
1036 //firm_dbg_set_mask(env.dbg, 31);
1038 env.irg = birg->irg;
1039 env.main_env = birg->main_env;
1040 env.alap_queue = new_waitq();
1041 env.arch_env = birg->main_env->arch_env;
1042 env.cpu = arch_isa_get_machine(birg->main_env->arch_env->isa);
1043 env.opts = &ilp_opts;
1044 phase_init(&env.ph, name, env.irg, PHASE_DEFAULT_GROWTH, init_ilpsched_irn);
1046 irg_walk_in_or_dep_graph(env.irg, NULL, build_block_idx, &env);
1049 The block indicees are completely build after the walk,
1050 now we can allocate the bitsets (size depends on block indicees)
1053 phase_reinit_irn_data(&env.ph);
1055 /* Collect all root nodes (having no user in their block) and calculate ASAP. */
1056 irg_walk_in_or_dep_blkwise_graph(env.irg, collect_alap_root_nodes, calculate_irn_asap, &env);
1058 /* calculate ALAP and create variables */
1059 irg_block_walk_graph(env.irg, calculate_block_alap, create_ilp, &env);
1062 if (firm_dbg_get_mask(env.dbg)) {
1064 phase_stat_t *stat_ptr = phase_stat(&env.ph, &stat);
1066 fprintf(stderr, "Phase used: %u bytes\n", stat_ptr->overall_bytes);
1070 /* free all allocated object */
1071 del_waitq(env.alap_queue);
1072 phase_free(&env.ph);
1077 * Register ILP scheduler options.
1079 void ilpsched_register_options(lc_opt_entry_t *grp) {
1080 static int run_once = 0;
1081 lc_opt_entry_t *sched_grp;
1085 sched_grp = lc_opt_get_grp(grp, "ilpsched");
1087 lc_opt_add_table(sched_grp, ilpsched_option_table);
1090 #endif /* WITH_LIBCORE */
1092 #else /* WITH_ILP */
1094 static int some_picky_compiler_do_not_allow_empty_files;
1096 #endif /* WITH_ILP */