2 * Implements a trace scheduler as presented in Muchnik[TM].
3 * Originally implemented by Michael Beck.
4 * @author Christian Wuerdig
11 #include "iredges_t.h"
13 #include "besched_t.h"
14 #include "belistsched.h"
17 /* we need a special mark */
21 typedef struct _trace_irn {
22 sched_timestep_t delay; /**< The delay for this node if already calculated, else 0. */
23 sched_timestep_t etime; /**< The earliest time of this node. */
24 unsigned num_user; /**< The number real users (mode datab) of this node */
25 int reg_diff; /**< The difference of num(out registers) - num(in registers) */
26 int preorder; /**< The pre-order position */
27 unsigned critical_path_len; /**< The weighted length of the longest critical path */
28 unsigned is_root : 1; /**< is a root node of a block */
31 typedef struct _trace_env {
32 trace_irn_t *sched_info; /**< trace scheduling information about the nodes */
33 const arch_env_t *arch_env; /**< the arch environment */
34 sched_timestep_t curr_time; /**< current time of the scheduler */
35 void *selector_env; /**< the backend selector environment */
36 const list_sched_selector_t *selector; /**< the actual backend selector */
37 DEBUG_ONLY(firm_dbg_module_t *dbg;)
41 * Returns non-zero if the node is a root node
43 static INLINE unsigned is_root_node(trace_env_t *env, ir_node *n)
45 int idx = get_irn_idx(n);
47 assert(idx < ARR_LEN(env->sched_info));
48 return env->sched_info[idx].is_root;
52 * Mark a node as root node
54 static INLINE void mark_root_node(trace_env_t *env, ir_node *n)
56 int idx = get_irn_idx(n);
58 assert(idx < ARR_LEN(env->sched_info));
59 env->sched_info[idx].is_root = 1;
63 * Get the current delay.
65 static INLINE sched_timestep_t get_irn_delay(trace_env_t *env, ir_node *n) {
66 int idx = get_irn_idx(n);
68 assert(idx < ARR_LEN(env->sched_info));
69 return env->sched_info[idx].delay;
73 * Set the current delay.
75 static INLINE void set_irn_delay(trace_env_t *env, ir_node *n, sched_timestep_t delay) {
76 int idx = get_irn_idx(n);
78 assert(idx < ARR_LEN(env->sched_info));
79 env->sched_info[idx].delay = delay;
83 * Get the current etime.
85 static INLINE sched_timestep_t get_irn_etime(trace_env_t *env, ir_node *n) {
86 int idx = get_irn_idx(n);
88 assert(idx < ARR_LEN(env->sched_info));
89 return env->sched_info[idx].etime;
93 * Set the current etime.
95 static INLINE void set_irn_etime(trace_env_t *env, ir_node *n, sched_timestep_t etime) {
96 int idx = get_irn_idx(n);
98 assert(idx < ARR_LEN(env->sched_info));
99 env->sched_info[idx].etime = etime;
103 * Get the number of users.
105 static INLINE unsigned get_irn_num_user(trace_env_t *env, ir_node *n) {
106 int idx = get_irn_idx(n);
108 assert(idx < ARR_LEN(env->sched_info));
109 return env->sched_info[idx].num_user;
113 * Set the number of users.
115 static INLINE void set_irn_num_user(trace_env_t *env, ir_node *n, unsigned num_user) {
116 int idx = get_irn_idx(n);
118 assert(idx < ARR_LEN(env->sched_info));
119 env->sched_info[idx].num_user = num_user;
123 * Get the register difference.
125 static INLINE int get_irn_reg_diff(trace_env_t *env, ir_node *n) {
126 int idx = get_irn_idx(n);
128 assert(idx < ARR_LEN(env->sched_info));
129 return env->sched_info[idx].reg_diff;
133 * Set the register difference.
135 static INLINE void set_irn_reg_diff(trace_env_t *env, ir_node *n, int reg_diff) {
136 int idx = get_irn_idx(n);
138 assert(idx < ARR_LEN(env->sched_info));
139 env->sched_info[idx].reg_diff = reg_diff;
143 * Get the pre-order position.
145 static INLINE int get_irn_preorder(trace_env_t *env, ir_node *n) {
146 int idx = get_irn_idx(n);
148 assert(idx < ARR_LEN(env->sched_info));
149 return env->sched_info[idx].preorder;
153 * Set the pre-order position.
155 static INLINE void set_irn_preorder(trace_env_t *env, ir_node *n, int pos) {
156 int idx = get_irn_idx(n);
158 assert(idx < ARR_LEN(env->sched_info));
159 env->sched_info[idx].preorder = pos;
163 * Get the pre-order position.
165 static INLINE unsigned get_irn_critical_path_len(trace_env_t *env, ir_node *n) {
166 int idx = get_irn_idx(n);
168 assert(idx < ARR_LEN(env->sched_info));
169 return env->sched_info[idx].critical_path_len;
173 * Set the pre-order position.
175 static INLINE void set_irn_critical_path_len(trace_env_t *env, ir_node *n, unsigned len) {
176 int idx = get_irn_idx(n);
178 assert(idx < ARR_LEN(env->sched_info));
179 env->sched_info[idx].critical_path_len = len;
183 * returns the exec-time for node n.
185 static sched_timestep_t exectime(trace_env_t *env, ir_node *n) {
186 if (be_is_Keep(n) || is_Proj(n))
188 if (env->selector->exectime)
189 return env->selector->exectime(env->arch_env, n);
194 * Calculates the latency for between two ops
196 static sched_timestep_t latency(trace_env_t *env, ir_node *pred, int pred_cycle, ir_node *curr, int curr_cycle) {
197 /* a Keep hides a root */
198 if (be_is_Keep(curr))
199 return exectime(env, pred);
201 /* Proj's are executed immediately */
205 /* predecessors Proj's must be skipped */
207 pred = get_Proj_pred(pred);
209 if (env->selector->latency)
210 return env->selector->latency(env->arch_env, pred, pred_cycle, curr, curr_cycle);
215 * Returns the number of users of a node having mode datab.
217 static int get_num_successors(ir_node *irn) {
219 const ir_edge_t *edge;
221 if (get_irn_mode(irn) == mode_T) {
222 /* for mode_T nodes: count the users of all Projs */
223 foreach_out_edge(irn, edge) {
224 ir_node *proj = get_edge_src_irn(edge);
225 ir_mode *mode = get_irn_mode(proj);
228 sum += get_num_successors(proj);
229 else if (mode_is_datab(mode))
230 sum += get_irn_n_edges(proj);
234 /* do not count keep-alive edges */
235 foreach_out_edge(irn, edge) {
236 if (get_irn_opcode(get_edge_src_irn(edge)) != iro_End)
245 * Returns the difference of regs_output - regs_input;
247 static int get_reg_difference(trace_env_t *env, ir_node *irn) {
252 if (get_irn_mode(irn) == mode_T) {
253 /* mode_T nodes: num out regs == num Projs with mode datab */
254 const ir_edge_t *edge;
255 foreach_out_edge(irn, edge) {
256 ir_node *proj = get_edge_src_irn(edge);
257 if (mode_is_datab(get_irn_mode(proj)))
264 /* num in regs: number of ins with mode datab and not ignore */
265 for (i = get_irn_arity(irn) - 1; i >= 0; i--) {
266 ir_node *in = get_irn_n(irn, i);
267 if (mode_is_datab(get_irn_mode(in)) && ! arch_irn_is(env->arch_env, in, ignore))
271 return num_out - num_in;
275 * descent into a dag and create a pre-order list.
277 static void descent(ir_node *root, ir_node *block, ir_node **list, trace_env_t *env, unsigned path_len) {
280 if (! is_Phi(root)) {
281 path_len += exectime(env, root);
282 if (get_irn_critical_path_len(env, root) < path_len) {
283 set_irn_critical_path_len(env, root, path_len);
286 /* Phi nodes always leave the block */
287 for (i = get_irn_arity(root) - 1; i >= 0; --i) {
288 ir_node *pred = get_irn_n(root, i);
290 DBG((env->dbg, LEVEL_3, " node %+F\n", pred));
291 /* Blocks may happen as predecessors of End nodes */
295 /* already seen nodes are not marked */
296 if (get_irn_link(pred) != MARK)
299 /* don't leave our block */
300 if (get_nodes_block(pred) != block)
303 /* calculate number of users (needed for heuristic) */
304 set_irn_num_user(env, root, get_num_successors(root));
306 /* calculate register difference (needed for heuristic) */
307 set_irn_reg_diff(env, root, get_reg_difference(env, root));
309 set_irn_link(pred, NULL);
311 descent(pred, block, list, env, path_len);
314 set_irn_link(root, *list);
319 * Returns non-zero if root is a root in the block block.
321 static int is_root(ir_node *root, ir_node *block) {
322 const ir_edge_t *edge;
324 foreach_out_edge(root, edge) {
325 ir_node *succ = get_edge_src_irn(edge);
329 /* Phi nodes are always in "another block */
332 if (get_nodes_block(succ) == block)
339 * Performs initial block calculations for trace scheduling.
341 static void trace_preprocess_block(trace_env_t *env, ir_node *block) {
342 ir_node *root = NULL, *preord = NULL;
345 const ir_edge_t *edge;
347 /* First step: Find the root set. */
348 foreach_out_edge(block, edge) {
349 ir_node *succ = get_edge_src_irn(edge);
351 if (is_root(succ, block)) {
352 mark_root_node(env, succ);
353 set_irn_link(succ, root);
357 set_irn_link(succ, MARK);
360 /* Second step: calculate the pre-order list. */
362 for (curr = root; curr; curr = irn) {
363 irn = get_irn_link(curr);
364 DBG((env->dbg, LEVEL_2, " DAG root %+F\n", curr));
365 descent(curr, block, &preord, env, 0);
369 /* Third step: calculate the Delay. Note that our
370 * list is now in pre-order, starting at root
372 for (cur_pos = 0, curr = root; curr; curr = get_irn_link(curr), cur_pos++) {
375 if (arch_irn_class_is(env->arch_env, curr, branch)) {
376 /* assure, that branches can be executed last */
380 if (is_root_node(env, curr))
381 d = exectime(env, curr);
384 foreach_out_edge(curr, edge) {
385 ir_node *n = get_edge_src_irn(edge);
387 if (get_nodes_block(n) == block) {
390 ld = latency(env, curr, 1, n, 0) + get_irn_delay(env, n);
396 set_irn_delay(env, curr, d);
397 DB((env->dbg, LEVEL_2, "\t%+F delay %u\n", curr, d));
399 /* set the etime of all nodes to 0 */
400 set_irn_etime(env, curr, 0);
402 set_irn_preorder(env, curr, cur_pos);
407 * This functions gets called after a node finally has been made ready.
409 static void trace_node_ready(trace_env_t *env, ir_node *irn, ir_node *pred) {
410 sched_timestep_t etime_p, etime;
412 etime = env->curr_time;
414 etime_p = get_irn_etime(env, pred);
415 etime += latency(env, pred, 1, irn, 0);
416 etime = etime_p > etime ? etime_p : etime;
419 set_irn_etime(env, irn, etime);
420 DB((env->dbg, LEVEL_2, "\tset etime of %+F to %u\n", irn, etime));
424 * Update the current time after irn has been selected.
426 static void trace_update_time(trace_env_t *env, ir_node *irn) {
427 if (is_Phi(irn) || get_irn_opcode(irn) == iro_Start) {
428 env->curr_time += get_irn_etime(env, irn);
431 env->curr_time += exectime(env, irn);
436 * Allocates memory and initializes trace scheduling environment.
437 * @param birg The backend irg object
438 * @return The environment
440 static trace_env_t *trace_init(const arch_env_t *arch_env, ir_graph *irg) {
441 trace_env_t *env = xcalloc(1, sizeof(*env));
442 int nn = get_irg_last_idx(irg);
444 env->arch_env = arch_env;
446 env->sched_info = NEW_ARR_F(trace_irn_t, nn);
447 FIRM_DBG_REGISTER(env->dbg, "firm.be.sched.trace");
449 memset(env->sched_info, 0, nn * sizeof(*(env->sched_info)));
455 * Frees all memory allocated for trace scheduling environment.
456 * @param env The environment
458 static void trace_free(trace_env_t *env) {
459 DEL_ARR_F(env->sched_info);
464 * Simple selector. Just assure that jumps are scheduled last.
466 static ir_node *basic_selection(const arch_env_t *arch_env, nodeset *ready_set) {
469 /* assure that branches and constants are executed last */
470 for (irn = nodeset_first(ready_set); irn; irn = nodeset_next(ready_set)) {
471 if (! arch_irn_class_is(arch_env, irn, branch)) {
472 nodeset_break(ready_set);
477 /* at last: schedule branches */
478 irn = nodeset_first(ready_set);
479 nodeset_break(ready_set);
485 * The muchnik selector.
487 static ir_node *muchnik_select(void *block_env, nodeset *ready_set, nodeset *live_set)
489 trace_env_t *env = block_env;
490 nodeset *mcands, *ecands;
491 sched_timestep_t max_delay = 0;
494 /* calculate the max delay of all candidates */
495 foreach_nodeset(ready_set, irn) {
496 sched_timestep_t d = get_irn_delay(env, irn);
498 max_delay = d > max_delay ? d : max_delay;
501 mcands = new_nodeset(8);
502 ecands = new_nodeset(8);
504 /* build mcands and ecands */
505 foreach_nodeset(ready_set, irn) {
506 if (get_irn_delay(env, irn) == max_delay) {
507 nodeset_insert(mcands, irn);
508 if (get_irn_etime(env, irn) <= env->curr_time)
509 nodeset_insert(ecands, irn);
514 if (nodeset_count(mcands) == 1) {
515 irn = nodeset_first(mcands);
516 DB((env->dbg, LEVEL_3, "\tirn = %+F, mcand = 1, max_delay = %u\n", irn, max_delay));
519 int cnt = nodeset_count(ecands);
521 irn = nodeset_first(ecands);
523 if (arch_irn_class_is(env->arch_env, irn, branch)) {
524 /* BEWARE: don't select a JUMP if others are still possible */
527 DB((env->dbg, LEVEL_3, "\tirn = %+F, ecand = 1, max_delay = %u\n", irn, max_delay));
530 DB((env->dbg, LEVEL_3, "\tecand = %d, max_delay = %u\n", cnt, max_delay));
531 irn = basic_selection(env->arch_env, ecands);
535 DB((env->dbg, LEVEL_3, "\tmcand = %d\n", nodeset_count(mcands)));
536 irn = basic_selection(env->arch_env, mcands);
543 static void *muchnik_init_graph(const list_sched_selector_t *vtab, const arch_env_t *arch_env, ir_graph *irg)
545 trace_env_t *env = trace_init(arch_env, irg);
546 env->selector = vtab;
547 env->selector_env = env;
551 static void *muchnik_init_block(void *graph_env, ir_node *bl)
553 trace_preprocess_block(graph_env, bl);
557 static const list_sched_selector_t muchnik_selector_struct = {
561 NULL, /* to_appear_in_schedule */
562 trace_node_ready, /* node_ready */
563 trace_update_time, /* node_selected */
566 NULL, /* finish_block */
567 trace_free /* finish_graph */
570 const list_sched_selector_t *muchnik_selector = &muchnik_selector_struct;
573 * Execute the heuristic function.
575 static ir_node *heuristic_select(void *block_env, nodeset *ns, nodeset *lv)
577 trace_env_t *trace_env = block_env;
578 ir_node *irn, *cand = NULL;
579 int max_prio = INT_MIN;
580 int cur_prio = INT_MIN;
581 int cur_pressure = nodeset_count(lv);
582 int reg_fact, cand_reg_fact;
584 /* prefer instructions which can be scheduled early */
586 /* prefer instructions with lots of successors */
587 #define PRIO_NUMSUCCS 8
588 /* prefer instructions with long critical path */
589 #define PRIO_LEVEL 12
590 /* prefer instructions coming early in preorder */
591 #define PRIO_PREORD 8
592 /* weight of current register pressure */
593 #define PRIO_CUR_PRESS 20
594 /* weight of register pressure difference */
595 #define PRIO_CHG_PRESS 8
597 /* priority based selection, heuristic inspired by mueller diss */
598 foreach_nodeset(ns, irn) {
599 /* make sure that branches are scheduled last */
600 if (! arch_irn_class_is(trace_env->arch_env, irn, branch)) {
601 int rdiff = get_irn_reg_diff(trace_env, irn);
602 int sign = rdiff < 0;
603 int chg = (rdiff < 0 ? -rdiff : rdiff) << PRIO_CHG_PRESS;
605 reg_fact = chg << cur_pressure;
607 reg_fact = INT_MAX - 2;
608 reg_fact = sign ? -reg_fact : reg_fact;
610 cur_prio = (get_irn_critical_path_len(trace_env, irn) << PRIO_LEVEL)
611 //- (get_irn_delay(trace_env, irn) << PRIO_LEVEL)
612 + (get_irn_num_user(trace_env, irn) << PRIO_NUMSUCCS)
613 - (get_irn_etime(trace_env, irn) << PRIO_TIME)
614 //- ((get_irn_reg_diff(trace_env, irn) >> PRIO_CHG_PRESS) << ((cur_pressure >> PRIO_CUR_PRESS) - 3))
616 + (get_irn_preorder(trace_env, irn) << PRIO_PREORD); /* high preorder means early schedule */
617 if (cur_prio > max_prio) {
620 cand_reg_fact = reg_fact;
623 DBG((trace_env->dbg, LEVEL_4, "checked NODE %+F\n", irn));
624 DBG((trace_env->dbg, LEVEL_4, "\tpriority: %d\n", cur_prio));
625 DBG((trace_env->dbg, LEVEL_4, "\tpath len: %d (%d)\n", get_irn_critical_path_len(trace_env, irn), get_irn_critical_path_len(trace_env, irn) << PRIO_LEVEL));
626 DBG((trace_env->dbg, LEVEL_4, "\tdelay: %d (%d)\n", get_irn_delay(trace_env, irn), get_irn_delay(trace_env, irn) << PRIO_LEVEL));
627 DBG((trace_env->dbg, LEVEL_4, "\t#user: %d (%d)\n", get_irn_num_user(trace_env, irn), get_irn_num_user(trace_env, irn) << PRIO_NUMSUCCS));
628 DBG((trace_env->dbg, LEVEL_4, "\tetime: %d (%d)\n", get_irn_etime(trace_env, irn), 0 - (get_irn_etime(trace_env, irn) << PRIO_TIME)));
629 DBG((trace_env->dbg, LEVEL_4, "\tpreorder: %d (%d)\n", get_irn_preorder(trace_env, irn), get_irn_preorder(trace_env, irn) << PRIO_PREORD));
630 DBG((trace_env->dbg, LEVEL_4, "\treg diff: %d (%d)\n", get_irn_reg_diff(trace_env, irn), 0 - cand_reg_fact));
631 DBG((trace_env->dbg, LEVEL_4, "\tpressure: %d\n", cur_pressure));
636 DBG((trace_env->dbg, LEVEL_4, "heuristic selected %+F:\n", cand));
639 cand = basic_selection(trace_env->arch_env, ns);
645 static const list_sched_selector_t heuristic_selector_struct = {
649 NULL, /* to_appear_in_schedule */
650 trace_node_ready, /* node_ready */
651 trace_update_time, /* node_selected */
654 NULL, /* finish_block */
655 trace_free /* finish_graph */
658 const list_sched_selector_t *heuristic_selector = &heuristic_selector_struct;