2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief Implements a trace scheduler as presented in Muchnik[TM].
16 #include "iredges_t.h"
19 #include "belistsched.h"
24 /* we need a special mark */
28 typedef struct trace_irn {
29 sched_timestep_t delay; /**< The delay for this node if already calculated, else 0. */
30 sched_timestep_t etime; /**< The earliest time of this node. */
31 unsigned num_user; /**< The number real users (mode datab) of this node */
32 int reg_diff; /**< The difference of num(out registers) - num(in registers) */
33 int preorder; /**< The pre-order position */
34 unsigned critical_path_len; /**< The weighted length of the longest critical path */
35 unsigned is_root : 1; /**< is a root node of a block */
38 typedef struct trace_env {
39 trace_irn_t *sched_info; /**< trace scheduling information about the nodes */
40 sched_timestep_t curr_time; /**< current time of the scheduler */
41 be_lv_t *liveness; /**< The liveness for the irg */
42 DEBUG_ONLY(firm_dbg_module_t *dbg;)
46 * Returns a random node from a nodeset
48 static ir_node *get_nodeset_node(const ir_nodeset_t *nodeset)
50 return ir_nodeset_first(nodeset);
54 * Returns non-zero if the node is a root node
56 static inline unsigned is_root_node(trace_env_t *env, ir_node *n)
58 unsigned const idx = get_irn_idx(n);
60 assert(idx < ARR_LEN(env->sched_info));
61 return env->sched_info[idx].is_root;
65 * Mark a node as root node
67 static inline void mark_root_node(trace_env_t *env, ir_node *n)
69 unsigned const idx = get_irn_idx(n);
71 assert(idx < ARR_LEN(env->sched_info));
72 env->sched_info[idx].is_root = 1;
76 * Get the current delay.
78 static inline sched_timestep_t get_irn_delay(trace_env_t *env, ir_node *n)
80 unsigned const idx = get_irn_idx(n);
82 assert(idx < ARR_LEN(env->sched_info));
83 return env->sched_info[idx].delay;
87 * Set the current delay.
89 static inline void set_irn_delay(trace_env_t *env, ir_node *n, sched_timestep_t delay)
91 unsigned const idx = get_irn_idx(n);
93 assert(idx < ARR_LEN(env->sched_info));
94 env->sched_info[idx].delay = delay;
98 * Get the current etime.
100 static inline sched_timestep_t get_irn_etime(trace_env_t *env, ir_node *n)
102 unsigned const idx = get_irn_idx(n);
104 assert(idx < ARR_LEN(env->sched_info));
105 return env->sched_info[idx].etime;
109 * Set the current etime.
111 static inline void set_irn_etime(trace_env_t *env, ir_node *n, sched_timestep_t etime)
113 unsigned const idx = get_irn_idx(n);
115 assert(idx < ARR_LEN(env->sched_info));
116 env->sched_info[idx].etime = etime;
120 * Get the number of users.
122 static inline unsigned get_irn_num_user(trace_env_t *env, ir_node *n)
124 unsigned const idx = get_irn_idx(n);
126 assert(idx < ARR_LEN(env->sched_info));
127 return env->sched_info[idx].num_user;
131 * Set the number of users.
133 static inline void set_irn_num_user(trace_env_t *env, ir_node *n, unsigned num_user)
135 unsigned const idx = get_irn_idx(n);
137 assert(idx < ARR_LEN(env->sched_info));
138 env->sched_info[idx].num_user = num_user;
142 * Get the register difference.
144 static inline int get_irn_reg_diff(trace_env_t *env, ir_node *n)
146 unsigned const idx = get_irn_idx(n);
148 assert(idx < ARR_LEN(env->sched_info));
149 return env->sched_info[idx].reg_diff;
153 * Set the register difference.
155 static inline void set_irn_reg_diff(trace_env_t *env, ir_node *n, int reg_diff)
157 unsigned const idx = get_irn_idx(n);
159 assert(idx < ARR_LEN(env->sched_info));
160 env->sched_info[idx].reg_diff = reg_diff;
164 * Get the pre-order position.
166 static inline int get_irn_preorder(trace_env_t *env, ir_node *n)
168 unsigned const idx = get_irn_idx(n);
170 assert(idx < ARR_LEN(env->sched_info));
171 return env->sched_info[idx].preorder;
175 * Set the pre-order position.
177 static inline void set_irn_preorder(trace_env_t *env, ir_node *n, int pos)
179 unsigned const idx = get_irn_idx(n);
181 assert(idx < ARR_LEN(env->sched_info));
182 env->sched_info[idx].preorder = pos;
186 * Get the pre-order position.
188 static inline unsigned get_irn_critical_path_len(trace_env_t *env, ir_node *n)
190 unsigned const idx = get_irn_idx(n);
192 assert(idx < ARR_LEN(env->sched_info));
193 return env->sched_info[idx].critical_path_len;
197 * Set the pre-order position.
199 static inline void set_irn_critical_path_len(trace_env_t *env, ir_node *n, unsigned len)
201 unsigned const idx = get_irn_idx(n);
203 assert(idx < ARR_LEN(env->sched_info));
204 env->sched_info[idx].critical_path_len = len;
208 * returns the exec-time for node n.
210 static sched_timestep_t exectime(trace_env_t *env, ir_node *n)
213 if (be_is_Keep(n) || is_Proj(n))
219 * Calculates the latency for between two ops
221 static sched_timestep_t latency(trace_env_t *env, ir_node *pred, int pred_cycle, ir_node *curr, int curr_cycle)
225 /* a Keep hides a root */
226 if (be_is_Keep(curr))
227 return exectime(env, pred);
229 /* Proj's are executed immediately */
237 * Returns the number of users of a node having mode datab.
239 static int get_num_successors(ir_node *irn)
243 if (get_irn_mode(irn) == mode_T) {
244 /* for mode_T nodes: count the users of all Projs */
245 foreach_out_edge(irn, edge) {
246 ir_node *proj = get_edge_src_irn(edge);
247 ir_mode *mode = get_irn_mode(proj);
250 sum += get_num_successors(proj);
251 else if (mode_is_datab(mode))
252 sum += get_irn_n_edges(proj);
256 /* do not count keep-alive edges */
257 foreach_out_edge(irn, edge) {
258 if (get_irn_opcode(get_edge_src_irn(edge)) != iro_End)
267 * Returns the difference of regs_output - regs_input;
269 static int get_reg_difference(trace_env_t *env, ir_node *irn)
274 ir_node *block = get_nodes_block(irn);
276 if (be_is_Call(irn)) {
277 /* we want calls preferred */
281 if (get_irn_mode(irn) == mode_T) {
282 /* mode_T nodes: num out regs == num Projs with mode datab */
283 foreach_out_edge(irn, edge) {
284 ir_node *proj = get_edge_src_irn(edge);
285 if (mode_is_datab(get_irn_mode(proj)))
292 /* num in regs: number of ins with mode datab and not ignore */
293 for (i = get_irn_arity(irn) - 1; i >= 0; i--) {
294 ir_node *in = get_irn_n(irn, i);
296 if (!mode_is_datab(get_irn_mode(in)))
299 if (arch_irn_is_ignore(in))
302 if (be_is_live_end(env->liveness, block, in))
308 return num_out - num_in;
312 * descent into a dag and create a pre-order list.
314 static void descent(ir_node *root, ir_node *block, ir_node **list, trace_env_t *env, unsigned path_len)
318 if (! is_Phi(root)) {
319 path_len += exectime(env, root);
320 if (get_irn_critical_path_len(env, root) < path_len) {
321 set_irn_critical_path_len(env, root, path_len);
323 /* calculate number of users (needed for heuristic) */
324 set_irn_num_user(env, root, get_num_successors(root));
326 /* calculate register difference (needed for heuristic) */
327 set_irn_reg_diff(env, root, get_reg_difference(env, root));
329 /* Phi nodes always leave the block */
330 for (i = get_irn_arity(root) - 1; i >= 0; --i) {
331 ir_node *pred = get_irn_n(root, i);
333 DBG((env->dbg, LEVEL_3, " node %+F\n", pred));
335 /* Blocks may happen as predecessors of End nodes */
339 /* already seen nodes are not marked */
340 if (get_irn_link(pred) != MARK)
343 /* don't leave our block */
344 if (get_nodes_block(pred) != block)
347 set_irn_link(pred, NULL);
349 descent(pred, block, list, env, path_len);
352 set_irn_link(root, *list);
357 * Returns non-zero if root is a root in the block block.
359 static int is_root(ir_node *root, ir_node *block)
361 foreach_out_edge(root, edge) {
362 ir_node *succ = get_edge_src_irn(edge);
366 /* Phi nodes are always in "another block */
369 if (get_nodes_block(succ) == block)
376 * Performs initial block calculations for trace scheduling.
378 static void trace_preprocess_block(trace_env_t *env, ir_node *block)
380 ir_node *root = NULL, *preord = NULL;
384 /* First step: Find the root set. */
385 foreach_out_edge(block, edge) {
386 ir_node *succ = get_edge_src_irn(edge);
388 if (is_Anchor(succ)) {
389 /* ignore a keep alive edge */
392 if (is_root(succ, block)) {
393 mark_root_node(env, succ);
394 set_irn_link(succ, root);
398 set_irn_link(succ, MARK);
401 /* Second step: calculate the pre-order list. */
403 for (curr = root; curr; curr = irn) {
404 irn = (ir_node*)get_irn_link(curr);
405 DBG((env->dbg, LEVEL_2, " DAG root %+F\n", curr));
406 descent(curr, block, &preord, env, 0);
410 /* Third step: calculate the Delay. Note that our
411 * list is now in pre-order, starting at root
413 for (cur_pos = 0, curr = root; curr; curr = (ir_node*)get_irn_link(curr), cur_pos++) {
417 /* assure, that branches can be executed last */
421 if (is_root_node(env, curr))
422 d = exectime(env, curr);
425 foreach_out_edge(curr, edge) {
426 ir_node *n = get_edge_src_irn(edge);
428 if (get_nodes_block(n) == block) {
431 ld = latency(env, curr, 1, n, 0) + get_irn_delay(env, n);
437 set_irn_delay(env, curr, d);
438 DB((env->dbg, LEVEL_2, "\t%+F delay %u\n", curr, d));
440 /* set the etime of all nodes to 0 */
441 set_irn_etime(env, curr, 0);
443 set_irn_preorder(env, curr, cur_pos);
448 * This functions gets called after a node finally has been made ready.
450 static void trace_node_ready(void *data, ir_node *irn, ir_node *pred)
452 trace_env_t *env = (trace_env_t*)data;
453 sched_timestep_t etime_p, etime;
455 etime = env->curr_time;
457 etime_p = get_irn_etime(env, pred);
458 etime += latency(env, pred, 1, irn, 0);
459 etime = etime_p > etime ? etime_p : etime;
462 set_irn_etime(env, irn, etime);
463 DB((env->dbg, LEVEL_2, "\tset etime of %+F to %u\n", irn, etime));
467 * Update the current time after irn has been selected.
469 static void trace_update_time(void *data, ir_node *irn)
471 trace_env_t *env = (trace_env_t*)data;
472 if (is_Phi(irn) || get_irn_opcode(irn) == beo_Start) {
473 env->curr_time += get_irn_etime(env, irn);
476 env->curr_time += exectime(env, irn);
481 * Allocates memory and initializes trace scheduling environment.
482 * @param irg The backend irg object
483 * @return The environment
485 static trace_env_t *trace_init(ir_graph *irg)
487 trace_env_t *env = XMALLOCZ(trace_env_t);
488 int nn = get_irg_last_idx(irg);
491 env->sched_info = NEW_ARR_FZ(trace_irn_t, nn);
492 env->liveness = be_get_irg_liveness(irg);
493 FIRM_DBG_REGISTER(env->dbg, "firm.be.sched.trace");
495 be_assure_live_chk(irg);
501 * Frees all memory allocated for trace scheduling environment.
502 * @param env The environment
504 static void trace_free(void *data)
506 trace_env_t *env = (trace_env_t*)data;
507 DEL_ARR_F(env->sched_info);
512 * Simple selector. Just assure that jumps are scheduled last.
514 static ir_node *basic_selection(ir_nodeset_t *ready_set)
516 /* assure that branches and constants are executed last */
517 foreach_ir_nodeset(ready_set, irn, iter) {
523 /* at last: schedule branches */
524 return get_nodeset_node(ready_set);
528 * The muchnik selector.
530 static ir_node *muchnik_select(void *block_env, ir_nodeset_t *ready_set)
532 trace_env_t *env = (trace_env_t*)block_env;
533 ir_nodeset_t mcands, ecands;
534 sched_timestep_t max_delay = 0;
536 /* calculate the max delay of all candidates */
537 foreach_ir_nodeset(ready_set, irn, iter) {
538 sched_timestep_t d = get_irn_delay(env, irn);
540 max_delay = d > max_delay ? d : max_delay;
543 ir_nodeset_init_size(&mcands, 8);
544 ir_nodeset_init_size(&ecands, 8);
546 /* build mcands and ecands */
547 foreach_ir_nodeset(ready_set, irn, iter) {
548 if (get_irn_delay(env, irn) == max_delay) {
549 ir_nodeset_insert(&mcands, irn);
550 if (get_irn_etime(env, irn) <= env->curr_time)
551 ir_nodeset_insert(&ecands, irn);
557 if (ir_nodeset_size(&mcands) == 1) {
558 irn = get_nodeset_node(&mcands);
559 DB((env->dbg, LEVEL_3, "\tirn = %+F, mcand = 1, max_delay = %u\n", irn, max_delay));
562 size_t cnt = ir_nodeset_size(&ecands);
564 irn = get_nodeset_node(&ecands);
567 /* BEWARE: don't select a JUMP if others are still possible */
570 DB((env->dbg, LEVEL_3, "\tirn = %+F, ecand = 1, max_delay = %u\n", irn, max_delay));
573 DB((env->dbg, LEVEL_3, "\tecand = %zu, max_delay = %u\n", cnt, max_delay));
574 irn = basic_selection(&ecands);
578 DB((env->dbg, LEVEL_3, "\tmcand = %zu\n", ir_nodeset_size(&mcands)));
579 irn = basic_selection(&mcands);
586 static void *muchnik_init_graph(ir_graph *irg)
588 trace_env_t *env = trace_init(irg);
592 static void *muchnik_init_block(void *graph_env, ir_node *bl)
594 trace_env_t *env = (trace_env_t*) graph_env;
595 trace_preprocess_block(env, bl);
599 static void sched_muchnik(ir_graph *irg)
601 static const list_sched_selector_t muchnik_selector = {
605 trace_node_ready, /* node_ready */
606 trace_update_time, /* node_selected */
607 NULL, /* finish_block */
608 trace_free /* finish_graph */
610 be_list_sched_graph(irg, &muchnik_selector);
614 * Execute the heuristic function.
616 static ir_node *heuristic_select(void *block_env, ir_nodeset_t *ns)
618 trace_env_t *trace_env = (trace_env_t*)block_env;
619 ir_node *cand = NULL;
620 int max_prio = INT_MIN;
621 int cur_prio = INT_MIN;
623 /* Note: register pressure calculation needs an overhaul, you need correct
624 * tracking for each register class indidually and weight by each class
625 int cur_pressure = ir_nodeset_size(lv); */
626 int cur_pressure = 1;
628 /* prefer instructions which can be scheduled early */
630 /* prefer instructions with lots of successors */
631 #define PRIO_NUMSUCCS 8
632 /* prefer instructions with long critical path */
633 #define PRIO_LEVEL 12
634 /* prefer instructions coming early in preorder */
635 #define PRIO_PREORD 8
636 /* weight of current register pressure */
637 #define PRIO_CUR_PRESS 20
638 /* weight of register pressure difference */
639 #define PRIO_CHG_PRESS 8
641 /* priority based selection, heuristic inspired by mueller diss */
642 foreach_ir_nodeset(ns, irn, iter) {
643 /* make sure that branches are scheduled last */
645 int rdiff = get_irn_reg_diff(trace_env, irn);
646 int sign = rdiff < 0;
647 int chg = (rdiff < 0 ? -rdiff : rdiff) << PRIO_CHG_PRESS;
649 reg_fact = chg * cur_pressure;
651 reg_fact = INT_MAX - 2;
652 reg_fact = sign ? -reg_fact : reg_fact;
654 cur_prio = (get_irn_critical_path_len(trace_env, irn) << PRIO_LEVEL)
655 //- (get_irn_delay(trace_env, irn) << PRIO_LEVEL)
656 + (get_irn_num_user(trace_env, irn) << PRIO_NUMSUCCS)
657 - (get_irn_etime(trace_env, irn) << PRIO_TIME)
658 //- ((get_irn_reg_diff(trace_env, irn) >> PRIO_CHG_PRESS) << ((cur_pressure >> PRIO_CUR_PRESS) - 3))
660 + (get_irn_preorder(trace_env, irn) << PRIO_PREORD); /* high preorder means early schedule */
661 if (cur_prio > max_prio) {
666 DBG((trace_env->dbg, LEVEL_4, "checked NODE %+F\n", irn));
667 DBG((trace_env->dbg, LEVEL_4, "\tpriority: %d\n", cur_prio));
668 DBG((trace_env->dbg, LEVEL_4, "\tpath len: %d (%d)\n", get_irn_critical_path_len(trace_env, irn), get_irn_critical_path_len(trace_env, irn) << PRIO_LEVEL));
669 DBG((trace_env->dbg, LEVEL_4, "\tdelay: %d (%d)\n", get_irn_delay(trace_env, irn), get_irn_delay(trace_env, irn) << PRIO_LEVEL));
670 DBG((trace_env->dbg, LEVEL_4, "\t#user: %d (%d)\n", get_irn_num_user(trace_env, irn), get_irn_num_user(trace_env, irn) << PRIO_NUMSUCCS));
671 DBG((trace_env->dbg, LEVEL_4, "\tetime: %d (%d)\n", get_irn_etime(trace_env, irn), 0 - (get_irn_etime(trace_env, irn) << PRIO_TIME)));
672 DBG((trace_env->dbg, LEVEL_4, "\tpreorder: %d (%d)\n", get_irn_preorder(trace_env, irn), get_irn_preorder(trace_env, irn) << PRIO_PREORD));
673 DBG((trace_env->dbg, LEVEL_4, "\treg diff: %d (%d)\n", get_irn_reg_diff(trace_env, irn), 0 - reg_fact));
674 DBG((trace_env->dbg, LEVEL_4, "\tpressure: %d\n", cur_pressure));
679 DBG((trace_env->dbg, LEVEL_4, "heuristic selected %+F:\n", cand));
682 cand = basic_selection(ns);
688 static void sched_heuristic(ir_graph *irg)
690 static const list_sched_selector_t heuristic_selector = {
694 trace_node_ready, /* node_ready */
695 trace_update_time, /* node_selected */
696 NULL, /* finish_block */
697 trace_free /* finish_graph */
699 be_list_sched_graph(irg, &heuristic_selector);
702 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_sched_trace)
703 void be_init_sched_trace(void)
705 be_register_scheduler("heur", sched_heuristic);
706 be_register_scheduler("muchnik", sched_muchnik);