2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Move nodes to a block where they will be executed the least
24 * @author Christian Schaefer, Goetz Lindenmaier, Sebastian Felis,
28 * The idea here is to push nodes as deep into the dominance tree as their
29 * dependencies allow. After pushing them back up out of as many loops as
36 #include "iroptimize.h"
43 static bool is_block_reachable(ir_node *block)
45 return get_Block_dom_depth(block) >= 0;
48 /* mark node as not-visited */
49 static void clear_irn_visited(ir_node *node)
51 ir_graph *irg = get_irn_irg(node);
52 ir_visited_t visited = get_irg_visited(irg);
53 set_irn_visited(node, visited-1);
57 * Find the earliest correct block for node n. --- Place n into the
58 * same Block as its dominance-deepest Input.
60 * move_out_of_loops() expects that place_floats_early() have placed
61 * all "living" nodes into a living block. That's why we must
62 * move nodes in dead block with "live" successors into a valid
64 * We move them just into the same block as its successor (or
65 * in case of a Phi into the effective use block). For Phi successors,
66 * this may still be a dead block, but then there is no real use, as
67 * the control flow will be dead later.
69 static void place_floats_early(ir_node *n, waitq *worklist)
79 /* we must not run into an infinite loop */
80 if (irn_visited_else_mark(n))
83 /* The algorithm relies on the fact that all predecessors of a block are
84 * moved up after a call to place_float_early of the predecessors
85 * (see the loop below).
86 * However we have to break cycles somewhere. Relying on the visited flag
87 * will result in nodes not being moved up despite their place_floats_early
89 * Instead we break cycles at pinned nodes which won't move anyway:
90 * This works because in firm each cycle contains a Phi or Block node
93 if (get_irn_pinned(n) != op_pin_state_floats) {
94 /* we can't move pinned nodes */
95 arity = get_irn_arity(n);
96 for (i = 0; i < arity; ++i) {
97 ir_node *pred = get_irn_n(n, i);
98 pdeq_putr(worklist, pred);
101 pdeq_putr(worklist, get_nodes_block(n));
105 block = get_nodes_block(n);
106 /* do not move unreachable code (or its predecessors around) since dominance
108 if (!is_block_reachable(block))
111 /* first move predecessors up */
112 arity = get_irn_arity(n);
113 place_floats_early(block, worklist);
114 for (i = 0; i < arity; ++i) {
115 ir_node *pred = get_irn_n(n, i);
116 ir_node *pred_block = get_nodes_block(pred);
118 /* gcse can lead to predecessors of reachable code being unreachable.
119 * Move them into the current block in this case */
120 if (!is_block_reachable(pred_block)) {
121 ir_node *new_pred_block = block;
122 assert(get_irn_pinned(pred) == op_pin_state_floats);
124 new_pred_block = get_Block_cfgpred_block(block, i);
126 set_nodes_block(pred, new_pred_block);
127 clear_irn_visited(pred);
129 place_floats_early(pred, worklist);
132 /* determine earliest point */
135 for (i = 0; i < arity; ++i) {
136 ir_node *pred = get_irn_n(n, i);
137 ir_node *pred_block = get_nodes_block(pred);
138 int pred_depth = get_Block_dom_depth(pred_block);
139 if (pred_depth > new_depth) {
140 new_depth = pred_depth;
141 new_block = pred_block;
145 /* avoid moving nodes into the start block if we are not in the backend */
146 irg = get_irn_irg(n);
147 start_block = get_irg_start_block(irg);
148 if (new_block == start_block && block != start_block &&
149 get_irg_phase_state(irg) != phase_backend) {
150 assert(get_Block_n_cfg_outs(start_block) == 1);
151 new_block = get_Block_cfg_out(start_block, 0);
154 /* Set the new block */
155 if (new_block != NULL)
156 set_nodes_block(n, new_block);
160 * Floating nodes form subgraphs that begin at nodes as Const, Load,
161 * Start, Call and that end at op_pin_state_pinned nodes as Store, Call.
162 * Place_early places all floating nodes reachable from its argument through
163 * floating nodes and adds all beginnings at op_pin_state_pinned nodes to the
166 * @param worklist a worklist, used for the algorithm, empty on in/output
168 static void place_early(ir_graph *irg, waitq *worklist)
171 inc_irg_visited(irg);
173 /* this inits the worklist */
174 place_floats_early(get_irg_end(irg), worklist);
176 /* Work the content of the worklist. */
177 while (!waitq_empty(worklist)) {
178 ir_node *n = (ir_node*)waitq_get(worklist);
180 place_floats_early(n, worklist);
182 set_irg_pinned(irg, op_pin_state_pinned);
186 * Compute the deepest common dominator tree ancestor of block and dca.
188 * @param dca the deepest common dominator tree ancestor so far,
190 * @param block a block
192 * @return the deepest common dominator tree ancestor of block and dca
194 static ir_node *calc_dom_dca(ir_node *dca, ir_node *block)
196 assert(block != NULL);
198 /* We found a first legal placement. */
202 /* Find a placement that is dominates both, dca and block. */
203 while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
204 block = get_Block_idom(block);
206 while (get_Block_dom_depth(dca) > get_Block_dom_depth(block)) {
207 dca = get_Block_idom(dca);
210 while (block != dca) {
211 block = get_Block_idom(block); dca = get_Block_idom(dca);
217 * Deepest common dominance ancestor of DCA and CONSUMER of PRODUCER.
218 * I.e., DCA is the block where we might place PRODUCER.
219 * A data flow edge points from producer to consumer.
221 static ir_node *consumer_dom_dca(ir_node *dca, ir_node *consumer,
224 /* Compute the last block into which we can place a node so that it is
226 if (is_Phi(consumer)) {
227 /* our consumer is a Phi-node, the effective use is in all those
228 blocks through which the Phi-node reaches producer */
229 ir_node *phi_block = get_nodes_block(consumer);
230 int arity = get_irn_arity(consumer);
233 for (i = 0; i < arity; i++) {
234 if (get_Phi_pred(consumer, i) == producer) {
235 ir_node *new_block = get_Block_cfgpred_block(phi_block, i);
236 if (is_Bad(new_block))
239 if (is_block_reachable(new_block))
240 dca = calc_dom_dca(dca, new_block);
244 dca = calc_dom_dca(dca, get_nodes_block(consumer));
249 static inline int get_block_loop_depth(ir_node *block)
251 return get_loop_depth(get_irn_loop(block));
255 * Move n to a block with less loop depth than its current block. The
256 * new block must be dominated by early.
258 * @param n the node that should be moved
259 * @param early the earliest block we can n move to
261 static void move_out_of_loops(ir_node *n, ir_node *early)
263 ir_node *block = get_nodes_block(n);
264 ir_node *best = block;
265 int best_depth = get_block_loop_depth(best);
267 /* Find the region deepest in the dominator tree dominating
268 dca with the least loop nesting depth, but still dominated
269 by our early placement. */
270 while (block != early) {
271 ir_node *idom = get_Block_idom(block);
272 int idom_depth = get_block_loop_depth(idom);
273 if (idom_depth < best_depth) {
275 best_depth = idom_depth;
279 if (best != get_nodes_block(n))
280 set_nodes_block(n, best);
284 * Calculate the deepest common ancestor in the dominator tree of all nodes'
285 * blocks depending on node; our final placement has to dominate DCA.
287 * @param node the definition node
288 * @param dca the deepest common ancestor block so far, initially
291 * @return the deepest common dominator ancestor of all blocks of node's users
293 static ir_node *get_deepest_common_dom_ancestor(ir_node *node, ir_node *dca)
297 for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
298 ir_node *succ = get_irn_out(node, i);
300 /* keepalive edges are special and don't respect the dominance */
305 /* Proj nodes are in the same block as node, so
306 * the users of Proj are our users. */
307 dca = get_deepest_common_dom_ancestor(succ, dca);
309 /* ignore successors in unreachable code */
310 ir_node *succ_blk = get_nodes_block(succ);
311 if (!is_block_reachable(succ_blk))
313 dca = consumer_dom_dca(dca, succ, node);
320 * Put all the Proj nodes of a node into a given block.
322 * @param node the mode_T node
323 * @param block the block to put the Proj nodes to
325 static void set_projs_block(ir_node *node, ir_node *block)
329 for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
330 ir_node *succ = get_irn_out(node, i);
332 assert(is_Proj(succ));
334 if (get_irn_mode(succ) == mode_T) {
335 set_projs_block(succ, block);
337 set_nodes_block(succ, block);
342 * Find the latest legal block for N and place N into the
343 * `optimal' Block between the latest and earliest legal block.
344 * The `optimal' block is the dominance-deepest block of those
345 * with the least loop-nesting-depth. This places N out of as many
346 * loops as possible and then makes it as control dependent as
349 static void place_floats_late(ir_node *n, pdeq *worklist)
356 if (irn_visited_else_mark(n))
359 n_outs = get_irn_n_outs(n);
360 /* break cycles at pinned nodes (see place place_floats_early) as to why */
361 if (get_irn_pinned(n) != op_pin_state_floats) {
362 for (i = 0; i < n_outs; ++i) {
363 ir_node *succ = get_irn_out(n, i);
364 pdeq_putr(worklist, succ);
369 /* place our users */
370 for (i = 0; i < n_outs; ++i) {
371 ir_node *succ = get_irn_out(n, i);
372 place_floats_late(succ, worklist);
375 /* no point in moving Projs around, they are moved with their predecessor */
378 /* some nodes should simply stay in the startblock */
379 if (is_irn_start_block_placed(n)) {
381 ir_graph *irg = get_irn_irg(n);
382 ir_node *start_block = get_irg_start_block(irg);
383 assert(get_nodes_block(n) == start_block);
388 /* don't move unreachable code around */
389 block = get_nodes_block(n);
390 if (!is_block_reachable(block))
393 /* deepest common ancestor in the dominator tree of all nodes'
394 blocks depending on us; our final placement has to dominate
396 dca = get_deepest_common_dom_ancestor(n, NULL);
398 set_nodes_block(n, dca);
399 move_out_of_loops(n, block);
400 if (get_irn_mode(n) == mode_T) {
401 set_projs_block(n, get_nodes_block(n));
407 * Place floating nodes on the given worklist as late as possible using
408 * the dominance tree.
410 * @param worklist the worklist containing the nodes to place
412 static void place_late(ir_graph *irg, waitq *worklist)
415 inc_irg_visited(irg);
417 /* This fills the worklist initially. */
418 place_floats_late(get_irg_start_block(irg), worklist);
420 /* And now empty the worklist again... */
421 while (!waitq_empty(worklist)) {
422 ir_node *n = (ir_node*)waitq_get(worklist);
424 place_floats_late(n, worklist);
428 /* Code Placement. */
429 void place_code(ir_graph *irg)
433 remove_critical_cf_edges(irg);
435 /* Handle graph state */
436 assert(get_irg_phase_state(irg) != phase_building);
437 assure_irg_outs(irg);
441 /* Place all floating nodes as early as possible. This guarantees
442 a legal code placement. */
443 worklist = new_waitq();
444 place_early(irg, worklist);
446 /* Note: place_early changes only blocks, no data edges. So, the
447 * data out edges are still valid, no need to recalculate them here. */
449 /* Now move the nodes down in the dominator tree. This reduces the
450 unnecessary executions of the node. */
451 place_late(irg, worklist);
453 set_irg_outs_inconsistent(irg);
454 set_irg_loopinfo_inconsistent(irg);
459 * Wrapper for place_code() inside the place_code pass.
461 static void place_code_wrapper(ir_graph *irg)
463 set_opt_global_cse(1);
464 optimize_graph_df(irg);
466 set_opt_global_cse(0);
469 ir_graph_pass_t *place_code_pass(const char *name)
471 return def_graph_pass(name ? name : "place", place_code_wrapper);