2 * Implements a trace scheduler as presented in Muchnik[TM].
3 * Originally implemented by Michael Beck.
4 * @author Christian Wuerdig
14 #include "iredges_t.h"
16 #include "besched_t.h"
17 #include "belistsched.h"
20 /* we need a special mark */
24 typedef struct _trace_irn {
25 sched_timestep_t delay; /**< The delay for this node if already calculated, else 0. */
26 sched_timestep_t etime; /**< The earliest time of this node. */
27 unsigned num_user; /**< The number real users (mode datab) of this node */
28 int reg_diff; /**< The difference of num(out registers) - num(in registers) */
29 int preorder; /**< The pre-order position */
30 unsigned critical_path_len; /**< The weighted length of the longest critical path */
31 unsigned is_root : 1; /**< is a root node of a block */
34 typedef struct _trace_env {
35 trace_irn_t *sched_info; /**< trace scheduling information about the nodes */
36 const arch_env_t *arch_env; /**< the arch environment */
37 sched_timestep_t curr_time; /**< current time of the scheduler */
38 void *selector_env; /**< the backend selector environment */
39 const list_sched_selector_t *selector; /**< the actual backend selector */
40 be_lv_t *liveness; /**< The liveness for the irg */
41 DEBUG_ONLY(firm_dbg_module_t *dbg;)
45 * Returns a random node from a nodeset
47 static ir_node *get_nodeset_node(const ir_nodeset_t *nodeset)
49 ir_nodeset_iterator_t iter;
51 ir_nodeset_iterator_init(&iter, nodeset);
52 return ir_nodeset_iterator_next(&iter);
56 * Returns non-zero if the node is a root node
58 static INLINE unsigned is_root_node(trace_env_t *env, ir_node *n)
60 int idx = get_irn_idx(n);
62 assert(idx < ARR_LEN(env->sched_info));
63 return env->sched_info[idx].is_root;
67 * Mark a node as root node
69 static INLINE void mark_root_node(trace_env_t *env, ir_node *n)
71 int idx = get_irn_idx(n);
73 assert(idx < ARR_LEN(env->sched_info));
74 env->sched_info[idx].is_root = 1;
78 * Get the current delay.
80 static INLINE sched_timestep_t get_irn_delay(trace_env_t *env, ir_node *n) {
81 int idx = get_irn_idx(n);
83 assert(idx < ARR_LEN(env->sched_info));
84 return env->sched_info[idx].delay;
88 * Set the current delay.
90 static INLINE void set_irn_delay(trace_env_t *env, ir_node *n, sched_timestep_t delay) {
91 int idx = get_irn_idx(n);
93 assert(idx < ARR_LEN(env->sched_info));
94 env->sched_info[idx].delay = delay;
98 * Get the current etime.
100 static INLINE sched_timestep_t get_irn_etime(trace_env_t *env, ir_node *n) {
101 int idx = get_irn_idx(n);
103 assert(idx < ARR_LEN(env->sched_info));
104 return env->sched_info[idx].etime;
108 * Set the current etime.
110 static INLINE void set_irn_etime(trace_env_t *env, ir_node *n, sched_timestep_t etime) {
111 int idx = get_irn_idx(n);
113 assert(idx < ARR_LEN(env->sched_info));
114 env->sched_info[idx].etime = etime;
118 * Get the number of users.
120 static INLINE unsigned get_irn_num_user(trace_env_t *env, ir_node *n) {
121 int idx = get_irn_idx(n);
123 assert(idx < ARR_LEN(env->sched_info));
124 return env->sched_info[idx].num_user;
128 * Set the number of users.
130 static INLINE void set_irn_num_user(trace_env_t *env, ir_node *n, unsigned num_user) {
131 int idx = get_irn_idx(n);
133 assert(idx < ARR_LEN(env->sched_info));
134 env->sched_info[idx].num_user = num_user;
138 * Get the register difference.
140 static INLINE int get_irn_reg_diff(trace_env_t *env, ir_node *n) {
141 int idx = get_irn_idx(n);
143 assert(idx < ARR_LEN(env->sched_info));
144 return env->sched_info[idx].reg_diff;
148 * Set the register difference.
150 static INLINE void set_irn_reg_diff(trace_env_t *env, ir_node *n, int reg_diff) {
151 int idx = get_irn_idx(n);
153 assert(idx < ARR_LEN(env->sched_info));
154 env->sched_info[idx].reg_diff = reg_diff;
158 * Get the pre-order position.
160 static INLINE int get_irn_preorder(trace_env_t *env, ir_node *n) {
161 int idx = get_irn_idx(n);
163 assert(idx < ARR_LEN(env->sched_info));
164 return env->sched_info[idx].preorder;
168 * Set the pre-order position.
170 static INLINE void set_irn_preorder(trace_env_t *env, ir_node *n, int pos) {
171 int idx = get_irn_idx(n);
173 assert(idx < ARR_LEN(env->sched_info));
174 env->sched_info[idx].preorder = pos;
178 * Get the pre-order position.
180 static INLINE unsigned get_irn_critical_path_len(trace_env_t *env, ir_node *n) {
181 int idx = get_irn_idx(n);
183 assert(idx < ARR_LEN(env->sched_info));
184 return env->sched_info[idx].critical_path_len;
188 * Set the pre-order position.
190 static INLINE void set_irn_critical_path_len(trace_env_t *env, ir_node *n, unsigned len) {
191 int idx = get_irn_idx(n);
193 assert(idx < ARR_LEN(env->sched_info));
194 env->sched_info[idx].critical_path_len = len;
198 * returns the exec-time for node n.
200 static sched_timestep_t exectime(trace_env_t *env, ir_node *n) {
201 if (be_is_Keep(n) || is_Proj(n))
203 if (env->selector->exectime)
204 return env->selector->exectime(env->selector_env, n);
209 * Calculates the latency for between two ops
211 static sched_timestep_t latency(trace_env_t *env, ir_node *pred, int pred_cycle, ir_node *curr, int curr_cycle) {
212 /* a Keep hides a root */
213 if (be_is_Keep(curr))
214 return exectime(env, pred);
216 /* Proj's are executed immediately */
220 /* predecessors Proj's must be skipped */
222 pred = get_Proj_pred(pred);
224 if (env->selector->latency)
225 return env->selector->latency(env->selector_env, pred, pred_cycle, curr, curr_cycle);
230 * Returns the number of users of a node having mode datab.
232 static int get_num_successors(ir_node *irn) {
234 const ir_edge_t *edge;
236 if (get_irn_mode(irn) == mode_T) {
237 /* for mode_T nodes: count the users of all Projs */
238 foreach_out_edge(irn, edge) {
239 ir_node *proj = get_edge_src_irn(edge);
240 ir_mode *mode = get_irn_mode(proj);
243 sum += get_num_successors(proj);
244 else if (mode_is_datab(mode))
245 sum += get_irn_n_edges(proj);
249 /* do not count keep-alive edges */
250 foreach_out_edge(irn, edge) {
251 if (get_irn_opcode(get_edge_src_irn(edge)) != iro_End)
260 * Returns the difference of regs_output - regs_input;
262 static int get_reg_difference(trace_env_t *env, ir_node *irn) {
266 ir_node *block = get_nodes_block(irn);
268 if (be_is_Call(irn)) {
269 /* we want calls prefered */
273 if (get_irn_mode(irn) == mode_T) {
274 /* mode_T nodes: num out regs == num Projs with mode datab */
275 const ir_edge_t *edge;
276 foreach_out_edge(irn, edge) {
277 ir_node *proj = get_edge_src_irn(edge);
278 if (mode_is_datab(get_irn_mode(proj)))
285 /* num in regs: number of ins with mode datab and not ignore */
286 for (i = get_irn_arity(irn) - 1; i >= 0; i--) {
287 ir_node *in = get_irn_n(irn, i);
289 if (! be_is_live_end(env->liveness, block, in) && /* if the value lives outside of block: do not count */
290 mode_is_datab(get_irn_mode(in)) && /* must be data node */
291 ! arch_irn_is(env->arch_env, in, ignore)) /* ignore "ignore" nodes :) */
295 return num_out - num_in;
299 * descent into a dag and create a pre-order list.
301 static void descent(ir_node *root, ir_node *block, ir_node **list, trace_env_t *env, unsigned path_len) {
304 if (! is_Phi(root)) {
305 path_len += exectime(env, root);
306 if (get_irn_critical_path_len(env, root) < path_len) {
307 set_irn_critical_path_len(env, root, path_len);
309 /* calculate number of users (needed for heuristic) */
310 set_irn_num_user(env, root, get_num_successors(root));
312 /* calculate register difference (needed for heuristic) */
313 set_irn_reg_diff(env, root, get_reg_difference(env, root));
315 /* Phi nodes always leave the block */
316 for (i = get_irn_arity(root) - 1; i >= 0; --i) {
317 ir_node *pred = get_irn_n(root, i);
319 DBG((env->dbg, LEVEL_3, " node %+F\n", pred));
321 /* Blocks may happen as predecessors of End nodes */
325 /* already seen nodes are not marked */
326 if (get_irn_link(pred) != MARK)
329 /* don't leave our block */
330 if (get_nodes_block(pred) != block)
333 set_irn_link(pred, NULL);
335 descent(pred, block, list, env, path_len);
338 set_irn_link(root, *list);
343 * Returns non-zero if root is a root in the block block.
345 static int is_root(ir_node *root, ir_node *block) {
346 const ir_edge_t *edge;
348 foreach_out_edge(root, edge) {
349 ir_node *succ = get_edge_src_irn(edge);
353 /* Phi nodes are always in "another block */
356 if (get_nodes_block(succ) == block)
363 * Performs initial block calculations for trace scheduling.
365 static void trace_preprocess_block(trace_env_t *env, ir_node *block) {
366 ir_node *root = NULL, *preord = NULL;
369 const ir_edge_t *edge;
371 /* First step: Find the root set. */
372 foreach_out_edge(block, edge) {
373 ir_node *succ = get_edge_src_irn(edge);
375 if (is_root(succ, block)) {
376 mark_root_node(env, succ);
377 set_irn_link(succ, root);
381 set_irn_link(succ, MARK);
384 /* Second step: calculate the pre-order list. */
386 for (curr = root; curr; curr = irn) {
387 irn = get_irn_link(curr);
388 DBG((env->dbg, LEVEL_2, " DAG root %+F\n", curr));
389 descent(curr, block, &preord, env, 0);
393 /* Third step: calculate the Delay. Note that our
394 * list is now in pre-order, starting at root
396 for (cur_pos = 0, curr = root; curr; curr = get_irn_link(curr), cur_pos++) {
399 if (arch_irn_class_is(env->arch_env, curr, branch)) {
400 /* assure, that branches can be executed last */
404 if (is_root_node(env, curr))
405 d = exectime(env, curr);
408 foreach_out_edge(curr, edge) {
409 ir_node *n = get_edge_src_irn(edge);
411 if (get_nodes_block(n) == block) {
414 ld = latency(env, curr, 1, n, 0) + get_irn_delay(env, n);
420 set_irn_delay(env, curr, d);
421 DB((env->dbg, LEVEL_2, "\t%+F delay %u\n", curr, d));
423 /* set the etime of all nodes to 0 */
424 set_irn_etime(env, curr, 0);
426 set_irn_preorder(env, curr, cur_pos);
431 * This functions gets called after a node finally has been made ready.
433 static void trace_node_ready(void *data, ir_node *irn, ir_node *pred) {
434 trace_env_t *env = data;
435 sched_timestep_t etime_p, etime;
437 etime = env->curr_time;
439 etime_p = get_irn_etime(env, pred);
440 etime += latency(env, pred, 1, irn, 0);
441 etime = etime_p > etime ? etime_p : etime;
444 set_irn_etime(env, irn, etime);
445 DB((env->dbg, LEVEL_2, "\tset etime of %+F to %u\n", irn, etime));
449 * Update the current time after irn has been selected.
451 static void trace_update_time(void *data, ir_node *irn) {
452 trace_env_t *env = data;
453 if (is_Phi(irn) || get_irn_opcode(irn) == iro_Start) {
454 env->curr_time += get_irn_etime(env, irn);
457 env->curr_time += exectime(env, irn);
462 * Allocates memory and initializes trace scheduling environment.
463 * @param birg The backend irg object
464 * @return The environment
466 static trace_env_t *trace_init(const arch_env_t *arch_env, ir_graph *irg) {
467 trace_env_t *env = xcalloc(1, sizeof(*env));
468 int nn = get_irg_last_idx(irg);
470 env->arch_env = arch_env;
472 env->sched_info = NEW_ARR_F(trace_irn_t, nn);
473 env->liveness = be_liveness(irg);
474 FIRM_DBG_REGISTER(env->dbg, "firm.be.sched.trace");
476 memset(env->sched_info, 0, nn * sizeof(*(env->sched_info)));
482 * Frees all memory allocated for trace scheduling environment.
483 * @param env The environment
485 static void trace_free(void *data) {
486 trace_env_t *env = data;
487 be_liveness_free(env->liveness);
488 DEL_ARR_F(env->sched_info);
493 * Simple selector. Just assure that jumps are scheduled last.
495 static ir_node *basic_selection(const arch_env_t *arch_env, ir_nodeset_t *ready_set) {
497 ir_nodeset_iterator_t iter;
499 /* assure that branches and constants are executed last */
500 foreach_ir_nodeset(ready_set, irn, iter) {
501 if (! arch_irn_class_is(arch_env, irn, branch)) {
506 /* at last: schedule branches */
507 irn = get_nodeset_node(ready_set);
513 * The muchnik selector.
515 static ir_node *muchnik_select(void *block_env, ir_nodeset_t *ready_set, ir_nodeset_t *live_set)
517 trace_env_t *env = block_env;
518 ir_nodeset_t mcands, ecands;
519 ir_nodeset_iterator_t iter;
520 sched_timestep_t max_delay = 0;
523 /* calculate the max delay of all candidates */
524 foreach_ir_nodeset(ready_set, irn, iter) {
525 sched_timestep_t d = get_irn_delay(env, irn);
527 max_delay = d > max_delay ? d : max_delay;
530 ir_nodeset_init_size(&mcands, 8);
531 ir_nodeset_init_size(&ecands, 8);
533 /* build mcands and ecands */
534 foreach_ir_nodeset(ready_set, irn, iter) {
535 if (get_irn_delay(env, irn) == max_delay) {
536 ir_nodeset_insert(&mcands, irn);
537 if (get_irn_etime(env, irn) <= env->curr_time)
538 ir_nodeset_insert(&ecands, irn);
543 if (ir_nodeset_size(&mcands) == 1) {
544 irn = get_nodeset_node(&mcands);
545 DB((env->dbg, LEVEL_3, "\tirn = %+F, mcand = 1, max_delay = %u\n", irn, max_delay));
548 int cnt = ir_nodeset_size(&ecands);
550 irn = get_nodeset_node(&ecands);
552 if (arch_irn_class_is(env->arch_env, irn, branch)) {
553 /* BEWARE: don't select a JUMP if others are still possible */
556 DB((env->dbg, LEVEL_3, "\tirn = %+F, ecand = 1, max_delay = %u\n", irn, max_delay));
559 DB((env->dbg, LEVEL_3, "\tecand = %d, max_delay = %u\n", cnt, max_delay));
560 irn = basic_selection(env->arch_env, &ecands);
564 DB((env->dbg, LEVEL_3, "\tmcand = %d\n", ir_nodeset_size(&mcands)));
565 irn = basic_selection(env->arch_env, &mcands);
572 static void *muchnik_init_graph(const list_sched_selector_t *vtab, const arch_env_t *arch_env, ir_graph *irg)
574 trace_env_t *env = trace_init(arch_env, irg);
575 env->selector = vtab;
576 env->selector_env = (void*) arch_env;
580 static void *muchnik_init_block(void *graph_env, ir_node *bl)
582 trace_preprocess_block(graph_env, bl);
586 static const list_sched_selector_t muchnik_selector_struct = {
590 NULL, /* to_appear_in_schedule */
591 trace_node_ready, /* node_ready */
592 trace_update_time, /* node_selected */
595 NULL, /* finish_block */
596 trace_free /* finish_graph */
599 const list_sched_selector_t *muchnik_selector = &muchnik_selector_struct;
602 * Execute the heuristic function.
604 static ir_node *heuristic_select(void *block_env, ir_nodeset_t *ns, ir_nodeset_t *lv)
606 trace_env_t *trace_env = block_env;
607 ir_node *irn, *cand = NULL;
608 int max_prio = INT_MIN;
609 int cur_prio = INT_MIN;
610 int cur_pressure = ir_nodeset_size(lv);
611 int reg_fact, cand_reg_fact;
612 ir_nodeset_iterator_t iter;
614 /* prefer instructions which can be scheduled early */
616 /* prefer instructions with lots of successors */
617 #define PRIO_NUMSUCCS 8
618 /* prefer instructions with long critical path */
619 #define PRIO_LEVEL 12
620 /* prefer instructions coming early in preorder */
621 #define PRIO_PREORD 8
622 /* weight of current register pressure */
623 #define PRIO_CUR_PRESS 20
624 /* weight of register pressure difference */
625 #define PRIO_CHG_PRESS 8
627 /* priority based selection, heuristic inspired by mueller diss */
628 foreach_ir_nodeset(ns, irn, iter) {
629 /* make sure that branches are scheduled last */
630 if (! arch_irn_class_is(trace_env->arch_env, irn, branch)) {
631 int rdiff = get_irn_reg_diff(trace_env, irn);
632 int sign = rdiff < 0;
633 int chg = (rdiff < 0 ? -rdiff : rdiff) << PRIO_CHG_PRESS;
635 //reg_fact = chg << cur_pressure;
636 reg_fact = chg * cur_pressure;
638 reg_fact = INT_MAX - 2;
639 reg_fact = sign ? -reg_fact : reg_fact;
641 cur_prio = (get_irn_critical_path_len(trace_env, irn) << PRIO_LEVEL)
642 //- (get_irn_delay(trace_env, irn) << PRIO_LEVEL)
643 + (get_irn_num_user(trace_env, irn) << PRIO_NUMSUCCS)
644 - (get_irn_etime(trace_env, irn) << PRIO_TIME)
645 //- ((get_irn_reg_diff(trace_env, irn) >> PRIO_CHG_PRESS) << ((cur_pressure >> PRIO_CUR_PRESS) - 3))
647 + (get_irn_preorder(trace_env, irn) << PRIO_PREORD); /* high preorder means early schedule */
648 if (cur_prio > max_prio) {
651 cand_reg_fact = reg_fact;
654 DBG((trace_env->dbg, LEVEL_4, "checked NODE %+F\n", irn));
655 DBG((trace_env->dbg, LEVEL_4, "\tpriority: %d\n", cur_prio));
656 DBG((trace_env->dbg, LEVEL_4, "\tpath len: %d (%d)\n", get_irn_critical_path_len(trace_env, irn), get_irn_critical_path_len(trace_env, irn) << PRIO_LEVEL));
657 DBG((trace_env->dbg, LEVEL_4, "\tdelay: %d (%d)\n", get_irn_delay(trace_env, irn), get_irn_delay(trace_env, irn) << PRIO_LEVEL));
658 DBG((trace_env->dbg, LEVEL_4, "\t#user: %d (%d)\n", get_irn_num_user(trace_env, irn), get_irn_num_user(trace_env, irn) << PRIO_NUMSUCCS));
659 DBG((trace_env->dbg, LEVEL_4, "\tetime: %d (%d)\n", get_irn_etime(trace_env, irn), 0 - (get_irn_etime(trace_env, irn) << PRIO_TIME)));
660 DBG((trace_env->dbg, LEVEL_4, "\tpreorder: %d (%d)\n", get_irn_preorder(trace_env, irn), get_irn_preorder(trace_env, irn) << PRIO_PREORD));
661 DBG((trace_env->dbg, LEVEL_4, "\treg diff: %d (%d)\n", get_irn_reg_diff(trace_env, irn), 0 - reg_fact));
662 DBG((trace_env->dbg, LEVEL_4, "\tpressure: %d\n", cur_pressure));
667 DBG((trace_env->dbg, LEVEL_4, "heuristic selected %+F:\n", cand));
670 cand = basic_selection(trace_env->arch_env, ns);
676 static const list_sched_selector_t heuristic_selector_struct = {
680 NULL, /* to_appear_in_schedule */
681 trace_node_ready, /* node_ready */
682 trace_update_time, /* node_selected */
685 NULL, /* finish_block */
686 trace_free /* finish_graph */
689 const list_sched_selector_t *heuristic_selector = &heuristic_selector_struct;