2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief Move nodes to a block where they will be executed the least
10 * @author Christian Schaefer, Goetz Lindenmaier, Sebastian Felis,
13 * The idea here is to push nodes as deep into the dominance tree as their
14 * dependencies allow. After pushing them back up out of as many loops as
21 #include "iroptimize.h"
24 #include "iredges_t.h"
28 static bool is_block_reachable(ir_node *block)
30 return get_Block_dom_depth(block) >= 0;
34 * Find the earliest correct block for node n. --- Place n into the
35 * same Block as its dominance-deepest Input.
37 * move_out_of_loops() expects that place_floats_early() have placed
38 * all "living" nodes into a living block. That's why we must
39 * move nodes in dead block with "live" successors into a valid
41 * We move them just into the same block as its successor (or
42 * in case of a Phi into the effective use block). For Phi successors,
43 * this may still be a dead block, but then there is no real use, as
44 * the control flow will be dead later.
46 static void place_floats_early(ir_node *n, waitq *worklist)
56 /* we must not run into an infinite loop */
57 if (irn_visited_else_mark(n))
60 /* The algorithm relies on the fact that all predecessors of a block are
61 * moved up after a call to place_float_early of the predecessors
62 * (see the loop below).
63 * However we have to break cycles somewhere. Relying on the visited flag
64 * will result in nodes not being moved up despite their place_floats_early
66 * Instead we break cycles at pinned nodes which won't move anyway:
67 * This works because in firm each cycle contains a Phi or Block node
70 if (get_irn_pinned(n) != op_pin_state_floats || only_used_by_keepalive(n)) {
71 /* we can't move pinned nodes */
72 arity = get_irn_arity(n);
73 for (i = 0; i < arity; ++i) {
74 ir_node *pred = get_irn_n(n, i);
75 pdeq_putr(worklist, pred);
78 pdeq_putr(worklist, get_nodes_block(n));
82 block = get_nodes_block(n);
84 /* first move predecessors up */
85 arity = get_irn_arity(n);
86 place_floats_early(block, worklist);
87 for (i = 0; i < arity; ++i) {
88 ir_node *pred = get_irn_n(n, i);
89 place_floats_early(pred, worklist);
92 /* determine earliest point */
95 for (i = 0; i < arity; ++i) {
96 ir_node *pred = get_irn_n(n, i);
97 ir_node *pred_block = get_nodes_block(pred);
98 int pred_depth = get_Block_dom_depth(pred_block);
99 if (pred_depth > new_depth) {
100 new_depth = pred_depth;
101 new_block = pred_block;
105 /* avoid moving nodes into the start block if we are not in the backend */
106 irg = get_irn_irg(n);
107 start_block = get_irg_start_block(irg);
108 if (new_block == start_block && block != start_block &&
109 !irg_is_constrained(irg, IR_GRAPH_CONSTRAINT_BACKEND)) {
110 assert(get_irn_n_edges_kind(start_block, EDGE_KIND_BLOCK) == 1);
111 const ir_edge_t *edge = get_block_succ_first(start_block);
112 new_block = get_edge_src_irn(edge);
115 /* Set the new block */
116 if (new_block != NULL)
117 set_nodes_block(n, new_block);
121 * Floating nodes form subgraphs that begin at nodes as Const, Load,
122 * Start, Call and that end at op_pin_state_pinned nodes as Store, Call.
123 * Place_early places all floating nodes reachable from its argument through
124 * floating nodes and adds all beginnings at op_pin_state_pinned nodes to the
127 * @param worklist a worklist, used for the algorithm, empty on in/output
129 static void place_early(ir_graph *irg, waitq *worklist)
132 inc_irg_visited(irg);
134 /* this inits the worklist */
135 place_floats_early(get_irg_end(irg), worklist);
137 /* Work the content of the worklist. */
138 while (!waitq_empty(worklist)) {
139 ir_node *n = (ir_node*)waitq_get(worklist);
141 place_floats_early(n, worklist);
143 set_irg_pinned(irg, op_pin_state_pinned);
147 * Compute the deepest common dominator tree ancestor of block and dca.
149 * @param dca the deepest common dominator tree ancestor so far,
151 * @param block a block
153 * @return the deepest common dominator tree ancestor of block and dca
155 static ir_node *calc_dom_dca(ir_node *dca, ir_node *block)
157 assert(block != NULL);
159 /* We found a first legal placement. */
163 /* Find a placement that is dominates both, dca and block. */
164 while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
165 block = get_Block_idom(block);
167 while (get_Block_dom_depth(dca) > get_Block_dom_depth(block)) {
168 dca = get_Block_idom(dca);
171 while (block != dca) {
172 block = get_Block_idom(block); dca = get_Block_idom(dca);
178 * Deepest common dominance ancestor of DCA and CONSUMER of PRODUCER.
179 * I.e., DCA is the block where we might place PRODUCER.
180 * A data flow edge points from producer to consumer.
182 static ir_node *consumer_dom_dca(ir_node *dca, ir_node *consumer,
185 /* Compute the last block into which we can place a node so that it is
187 if (is_Phi(consumer)) {
188 /* our consumer is a Phi-node, the effective use is in all those
189 blocks through which the Phi-node reaches producer */
190 ir_node *phi_block = get_nodes_block(consumer);
191 int arity = get_irn_arity(consumer);
194 for (i = 0; i < arity; i++) {
195 if (get_Phi_pred(consumer, i) == producer) {
196 ir_node *new_block = get_Block_cfgpred_block(phi_block, i);
197 if (is_Bad(new_block))
200 assert(is_block_reachable(new_block));
201 dca = calc_dom_dca(dca, new_block);
205 dca = calc_dom_dca(dca, get_nodes_block(consumer));
210 static inline int get_block_loop_depth(ir_node *block)
212 return get_loop_depth(get_irn_loop(block));
216 * Move n to a block with less loop depth than its current block. The
217 * new block must be dominated by early.
219 * @param n the node that should be moved
220 * @param early the earliest block we can n move to
222 static void move_out_of_loops(ir_node *n, ir_node *early)
224 ir_node *block = get_nodes_block(n);
225 ir_node *best = block;
226 int best_depth = get_block_loop_depth(best);
228 /* Find the region deepest in the dominator tree dominating
229 dca with the least loop nesting depth, but still dominated
230 by our early placement. */
231 while (block != early) {
232 ir_node *idom = get_Block_idom(block);
233 int idom_depth = get_block_loop_depth(idom);
234 if (idom_depth < best_depth) {
236 best_depth = idom_depth;
240 if (best != get_nodes_block(n))
241 set_nodes_block(n, best);
245 * Calculate the deepest common ancestor in the dominator tree of all nodes'
246 * blocks depending on node; our final placement has to dominate DCA.
248 * @param node the definition node
249 * @param dca the deepest common ancestor block so far, initially
252 * @return the deepest common dominator ancestor of all blocks of node's users
254 static ir_node *get_deepest_common_dom_ancestor(ir_node *node, ir_node *dca)
256 foreach_out_edge(node, edge) {
257 ir_node *succ = get_edge_src_irn(edge);
259 /* keepalive edges are special and don't respect the dominance */
264 /* Proj nodes are in the same block as node, so
265 * the users of Proj are our users. */
266 dca = get_deepest_common_dom_ancestor(succ, dca);
268 assert(is_block_reachable(get_nodes_block(succ)));
269 dca = consumer_dom_dca(dca, succ, node);
272 /* respect the keepalive rule: if our only user is a keepalive, then we must
273 * not move the node any further */
275 assert(only_used_by_keepalive(node));
276 return get_nodes_block(node);
279 foreach_out_edge_kind(node, edge, EDGE_KIND_DEP) {
280 ir_node *succ = get_edge_src_irn(edge);
281 assert(is_block_reachable(get_nodes_block(succ)));
282 dca = consumer_dom_dca(dca, succ, node);
288 * Put all the Proj nodes of a node into a given block.
290 * @param node the mode_T node
291 * @param block the block to put the Proj nodes to
293 static void set_projs_block(ir_node *node, ir_node *block)
295 foreach_out_edge(node, edge) {
296 ir_node *succ = get_edge_src_irn(edge);
301 set_nodes_block(succ, block);
302 if (get_irn_mode(succ) == mode_T) {
303 set_projs_block(succ, block);
309 * Find the latest legal block for N and place N into the
310 * `optimal' Block between the latest and earliest legal block.
311 * The `optimal' block is the dominance-deepest block of those
312 * with the least loop-nesting-depth. This places N out of as many
313 * loops as possible and then makes it as control dependent as
316 static void place_floats_late(ir_node *n, pdeq *worklist)
321 if (irn_visited_else_mark(n))
324 /* break cycles at pinned nodes (see place place_floats_early) as to why */
325 if (get_irn_pinned(n) != op_pin_state_floats) {
326 foreach_out_edge(n, edge) {
327 ir_node *succ = get_edge_src_irn(edge);
328 pdeq_putr(worklist, succ);
333 /* place our users */
334 foreach_out_edge(n, edge) {
335 ir_node *succ = get_edge_src_irn(edge);
336 place_floats_late(succ, worklist);
339 /* no point in moving Projs around, they are moved with their predecessor */
342 /* some nodes should simply stay in the startblock */
343 if (is_irn_start_block_placed(n)) {
344 assert(get_nodes_block(n) == get_irg_start_block(get_irn_irg(n)));
348 block = get_nodes_block(n);
349 assert(is_block_reachable(block));
351 /* deepest common ancestor in the dominator tree of all nodes'
352 blocks depending on us; our final placement has to dominate
354 dca = get_deepest_common_dom_ancestor(n, NULL);
356 set_nodes_block(n, dca);
357 move_out_of_loops(n, block);
358 if (get_irn_mode(n) == mode_T) {
359 set_projs_block(n, get_nodes_block(n));
365 * Place floating nodes on the given worklist as late as possible using
366 * the dominance tree.
368 * @param worklist the worklist containing the nodes to place
370 static void place_late(ir_graph *irg, waitq *worklist)
373 inc_irg_visited(irg);
375 /* This fills the worklist initially. */
376 place_floats_late(get_irg_start_block(irg), worklist);
378 /* And now empty the worklist again... */
379 while (!waitq_empty(worklist)) {
380 ir_node *n = (ir_node*)waitq_get(worklist);
382 place_floats_late(n, worklist);
386 /* Code Placement. */
387 void place_code(ir_graph *irg)
391 /* Handle graph state */
392 assure_irg_properties(irg,
393 IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES |
394 IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE |
395 IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES |
396 IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE |
397 IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
399 /* Place all floating nodes as early as possible. This guarantees
400 a legal code placement. */
401 worklist = new_waitq();
402 place_early(irg, worklist);
404 /* While GCSE might place nodes in unreachable blocks,
405 * these are now placed in reachable blocks. */
407 /* Note: place_early changes only blocks, no data edges. So, the
408 * data out edges are still valid, no need to recalculate them here. */
410 /* Now move the nodes down in the dominator tree. This reduces the
411 unnecessary executions of the node. */
412 place_late(irg, worklist);
415 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
419 * Wrapper for place_code() inside the place_code pass.
421 static void place_code_wrapper(ir_graph *irg)
423 set_opt_global_cse(1);
424 optimize_graph_df(irg);
426 set_opt_global_cse(0);
429 ir_graph_pass_t *place_code_pass(const char *name)
431 return def_graph_pass(name ? name : "place", place_code_wrapper);