2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Code Placement. Pins all floating nodes to a block where they
23 * will be executed only if needed.
24 * @author Christian Schaefer, Goetz Lindenmaier, Sebastian Felis,
38 * Returns non-zero, is a block is not reachable from Start.
40 * @param block the block to test
43 is_Block_unreachable(ir_node *block) {
44 return is_Block_dead(block) || get_Block_dom_depth(block) < 0;
48 * Find the earliest correct block for node n. --- Place n into the
49 * same Block as its dominance-deepest Input.
51 * We have to avoid calls to get_nodes_block() here
52 * because the graph is floating.
54 * move_out_of_loops() expects that place_floats_early() have placed
55 * all "living" nodes into a living block. That's why we must
56 * move nodes in dead block with "live" successors into a valid
58 * We move them just into the same block as it's successor (or
59 * in case of a Phi into the effective use block). For Phi successors,
60 * this may still be a dead block, but then there is no real use, as
61 * the control flow will be dead later.
63 * @param n the node to be placed
64 * @param worklist a worklist, predecessors of non-floating nodes are placed here
67 place_floats_early(ir_node *n, waitq *worklist) {
70 /* we must not run into an infinite loop */
71 assert(irn_not_visited(n));
74 /* Place floating nodes. */
75 if (get_irn_pinned(n) == op_pin_state_floats) {
76 ir_node *curr_block = get_nodes_block(n);
77 int in_dead_block = is_Block_unreachable(curr_block);
79 ir_node *b = NULL; /* The block to place this node in */
81 assert(is_no_Block(n));
83 if (is_irn_start_block_placed(n)) {
84 /* These nodes will not be placed by the loop below. */
85 b = get_irg_start_block(current_ir_graph);
89 /* find the block for this node. */
90 irn_arity = get_irn_arity(n);
91 for (i = 0; i < irn_arity; i++) {
92 ir_node *pred = get_irn_n(n, i);
95 if ((irn_not_visited(pred))
96 && (get_irn_pinned(pred) == op_pin_state_floats)) {
99 * If the current node is NOT in a dead block, but one of its
100 * predecessors is, we must move the predecessor to a live block.
101 * Such thing can happen, if global CSE chose a node from a dead block.
102 * We move it simply to our block.
103 * Note that neither Phi nor End nodes are floating, so we don't
104 * need to handle them here.
106 if (! in_dead_block) {
107 if (get_irn_pinned(pred) == op_pin_state_floats &&
108 is_Block_unreachable(get_nodes_block(pred)))
109 set_nodes_block(pred, curr_block);
111 place_floats_early(pred, worklist);
115 * A node in the Bad block must stay in the bad block,
116 * so don't compute a new block for it.
121 /* Because all loops contain at least one op_pin_state_pinned node, now all
122 our inputs are either op_pin_state_pinned or place_early() has already
123 been finished on them. We do not have any unfinished inputs! */
124 pred_block = get_nodes_block(pred);
125 if ((!is_Block_dead(pred_block)) &&
126 (get_Block_dom_depth(pred_block) > depth)) {
128 depth = get_Block_dom_depth(pred_block);
130 /* Avoid that the node is placed in the Start block */
132 get_Block_dom_depth(get_nodes_block(n)) > 1 &&
133 get_irg_phase_state(current_ir_graph) != phase_backend) {
134 b = get_Block_cfg_out(get_irg_start_block(current_ir_graph), 0);
135 assert(b != get_irg_start_block(current_ir_graph));
140 set_nodes_block(n, b);
144 * Add predecessors of non floating nodes and non-floating predecessors
145 * of floating nodes to worklist and fix their blocks if the are in dead block.
147 irn_arity = get_irn_arity(n);
151 * Simplest case: End node. Predecessors are keep-alives,
152 * no need to move out of dead block.
154 for (i = -1; i < irn_arity; ++i) {
155 ir_node *pred = get_irn_n(n, i);
156 if (irn_not_visited(pred))
157 waitq_put(worklist, pred);
159 } else if (is_Block(n)) {
161 * Blocks: Predecessors are control flow, no need to move
162 * them out of dead block.
164 for (i = irn_arity - 1; i >= 0; --i) {
165 ir_node *pred = get_irn_n(n, i);
166 if (irn_not_visited(pred))
167 waitq_put(worklist, pred);
169 } else if (is_Phi(n)) {
171 ir_node *curr_block = get_nodes_block(n);
172 int in_dead_block = is_Block_unreachable(curr_block);
175 * Phi nodes: move nodes from dead blocks into the effective use
176 * of the Phi-input if the Phi is not in a bad block.
178 pred = get_nodes_block(n);
179 if (irn_not_visited(pred))
180 waitq_put(worklist, pred);
182 for (i = irn_arity - 1; i >= 0; --i) {
183 ir_node *pred = get_irn_n(n, i);
185 if (irn_not_visited(pred)) {
186 if (! in_dead_block &&
187 get_irn_pinned(pred) == op_pin_state_floats &&
188 is_Block_unreachable(get_nodes_block(pred))) {
189 set_nodes_block(pred, get_Block_cfgpred_block(curr_block, i));
191 waitq_put(worklist, pred);
196 ir_node *curr_block = get_nodes_block(n);
197 int in_dead_block = is_Block_unreachable(curr_block);
200 * All other nodes: move nodes from dead blocks into the same block.
202 pred = get_nodes_block(n);
203 if (irn_not_visited(pred))
204 waitq_put(worklist, pred);
206 for (i = irn_arity - 1; i >= 0; --i) {
207 ir_node *pred = get_irn_n(n, i);
209 if (irn_not_visited(pred)) {
210 if (! in_dead_block &&
211 get_irn_pinned(pred) == op_pin_state_floats &&
212 is_Block_unreachable(get_nodes_block(pred))) {
213 set_nodes_block(pred, curr_block);
215 waitq_put(worklist, pred);
222 * Floating nodes form subgraphs that begin at nodes as Const, Load,
223 * Start, Call and that end at op_pin_state_pinned nodes as Store, Call. Place_early
224 * places all floating nodes reachable from its argument through floating
225 * nodes and adds all beginnings at op_pin_state_pinned nodes to the worklist.
227 * @param worklist a worklist, used for the algorithm, empty on in/output
229 static void place_early(waitq *worklist) {
231 inc_irg_visited(current_ir_graph);
233 /* this inits the worklist */
234 place_floats_early(get_irg_end(current_ir_graph), worklist);
236 /* Work the content of the worklist. */
237 while (!waitq_empty(worklist)) {
238 ir_node *n = waitq_get(worklist);
239 if (irn_not_visited(n))
240 place_floats_early(n, worklist);
243 set_irg_outs_inconsistent(current_ir_graph);
244 set_irg_pinned(current_ir_graph, op_pin_state_pinned);
248 * Compute the deepest common ancestor of block and dca.
250 static ir_node *calc_dca(ir_node *dca, ir_node *block) {
253 /* we do not want to place nodes in dead blocks */
254 if (is_Block_dead(block))
257 /* We found a first legal placement. */
258 if (!dca) return block;
260 /* Find a placement that is dominates both, dca and block. */
261 while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
262 block = get_Block_idom(block);
264 while (get_Block_dom_depth(dca) > get_Block_dom_depth(block)) {
265 dca = get_Block_idom(dca);
268 while (block != dca) {
269 block = get_Block_idom(block); dca = get_Block_idom(dca);
275 /** Deepest common dominance ancestor of DCA and CONSUMER of PRODUCER.
276 * I.e., DCA is the block where we might place PRODUCER.
277 * A data flow edge points from producer to consumer.
279 static ir_node *consumer_dom_dca(ir_node *dca, ir_node *consumer, ir_node *producer)
281 /* Compute the last block into which we can place a node so that it is
283 if (is_Phi(consumer)) {
284 /* our consumer is a Phi-node, the effective use is in all those
285 blocks through which the Phi-node reaches producer */
286 ir_node *phi_block = get_nodes_block(consumer);
287 int arity = get_irn_arity(consumer);
290 for (i = 0; i < arity; i++) {
291 if (get_Phi_pred(consumer, i) == producer) {
292 ir_node *new_block = get_Block_cfgpred_block(phi_block, i);
294 if (!is_Block_unreachable(new_block))
295 dca = calc_dca(dca, new_block);
299 dca = calc_dca(dca, get_nodes_block(consumer));
305 /* FIXME: the name clashes here with the function from ana/field_temperature.c
307 static INLINE int get_irn_loop_depth(ir_node *n) {
308 return get_loop_depth(get_irn_loop(n));
312 * Move n to a block with less loop depth than it's current block. The
313 * new block must be dominated by early.
315 * @param n the node that should be moved
316 * @param early the earliest block we can n move to
318 static void move_out_of_loops(ir_node *n, ir_node *early) {
323 /* Find the region deepest in the dominator tree dominating
324 dca with the least loop nesting depth, but still dominated
325 by our early placement. */
326 dca = get_nodes_block(n);
329 while (dca != early) {
330 dca = get_Block_idom(dca);
331 if (!dca || is_Bad(dca)) break; /* may be Bad if not reachable from Start */
332 if (get_irn_loop_depth(dca) < get_irn_loop_depth(best)) {
336 if (best != get_nodes_block(n)) {
338 printf("Moving out of loop: "); DDMN(n);
339 printf(" Outermost block: "); DDMN(early);
340 printf(" Best block: "); DDMN(best);
341 printf(" Innermost block: "); DDMN(get_nodes_block(n));
343 set_nodes_block(n, best);
347 /* deepest common ancestor in the dominator tree of all nodes'
348 blocks depending on us; our final placement has to dominate DCA. */
349 static ir_node *get_deepest_common_ancestor(ir_node *node, ir_node *dca)
353 for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
354 ir_node *succ = get_irn_out(node, i);
358 * This consumer is the End node, a keep alive edge.
359 * This is not a real consumer, so we ignore it
365 dca = get_deepest_common_ancestor(succ, dca);
367 /* ignore if succ is in dead code */
368 ir_node *succ_blk = get_nodes_block(succ);
369 if (is_Block_unreachable(succ_blk))
371 dca = consumer_dom_dca(dca, succ, node);
379 * Put all the Proj nodes of a node into a given block.
381 * @param node the mode_T node
382 * @param block the block to put the Proj nodes to
384 static void set_projs_block(ir_node *node, ir_node *block) {
387 for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
388 ir_node *succ = get_irn_out(node, i);
390 assert(is_Proj(succ));
392 if (get_irn_mode(succ) == mode_T) {
393 set_projs_block(succ, block);
395 set_nodes_block(succ, block);
400 * Find the latest legal block for N and place N into the
401 * `optimal' Block between the latest and earliest legal block.
402 * The `optimal' block is the dominance-deepest block of those
403 * with the least loop-nesting-depth. This places N out of as many
404 * loops as possible and then makes it as control dependent as
407 * @param n the node to be placed
408 * @param worklist a worklist, all successors of non-floating nodes are
411 static void place_floats_late(ir_node *n, pdeq *worklist) {
415 assert(irn_not_visited(n)); /* no multiple placement */
419 /* no need to place block nodes, control nodes are already placed. */
422 (get_irn_mode(n) != mode_X)) {
423 /* Remember the early_blk placement of this block to move it
424 out of loop no further than the early_blk placement. */
425 early_blk = get_nodes_block(n);
428 * BEWARE: Here we also get code, that is live, but
429 * was in a dead block. If the node is life, but because
430 * of CSE in a dead block, we still might need it.
433 /* Assure that our users are all placed, except the Phi-nodes.
434 --- Each data flow cycle contains at least one Phi-node. We
435 have to break the `user has to be placed before the
436 producer' dependence cycle and the Phi-nodes are the
437 place to do so, because we need to base our placement on the
438 final region of our users, which is OK with Phi-nodes, as they
439 are op_pin_state_pinned, and they never have to be placed after a
440 producer of one of their inputs in the same block anyway. */
441 for (i = get_irn_n_outs(n) - 1; i >= 0; --i) {
442 ir_node *succ = get_irn_out(n, i);
443 if (irn_not_visited(succ) && !is_Phi(succ))
444 place_floats_late(succ, worklist);
447 if (! is_Block_dead(early_blk)) {
448 /* do only move things that where not dead */
449 ir_op *op = get_irn_op(n);
451 /* We have to determine the final block of this node... except for
452 constants and Projs */
453 if ((get_irn_pinned(n) == op_pin_state_floats) &&
455 (op != op_SymConst) &&
458 /* deepest common ancestor in the dominator tree of all nodes'
459 blocks depending on us; our final placement has to dominate
461 ir_node *dca = get_deepest_common_ancestor(n, NULL);
463 set_nodes_block(n, dca);
464 move_out_of_loops(n, early_blk);
465 if (get_irn_mode(n) == mode_T) {
466 set_projs_block(n, get_nodes_block(n));
473 /* Add successors of all non-floating nodes on list. (Those of floating
474 nodes are placed already and therefore are marked.) */
475 n_outs = get_irn_n_outs(n);
476 for (i = 0; i < n_outs; i++) {
477 ir_node *succ = get_irn_out(n, i);
478 if (irn_not_visited(succ)) {
479 pdeq_putr(worklist, succ);
485 * Place floating nodes on the given worklist as late as possible using
486 * the dominance tree.
488 * @param worklist the worklist containing the nodes to place
490 static void place_late(waitq *worklist) {
492 inc_irg_visited(current_ir_graph);
494 /* This fills the worklist initially. */
495 place_floats_late(get_irg_start_block(current_ir_graph), worklist);
497 /* And now empty the worklist again... */
498 while (!waitq_empty(worklist)) {
499 ir_node *n = waitq_get(worklist);
500 if (irn_not_visited(n))
501 place_floats_late(n, worklist);
505 /* Code Placement. */
506 void place_code(ir_graph *irg) {
508 ir_graph *rem = current_ir_graph;
510 current_ir_graph = irg;
512 /* Handle graph state */
513 assert(get_irg_phase_state(irg) != phase_building);
516 if (1 || get_irg_loopinfo_state(irg) != loopinfo_consistent) {
517 free_loop_information(irg);
518 construct_cf_backedges(irg);
521 /* Place all floating nodes as early as possible. This guarantees
522 a legal code placement. */
523 worklist = new_waitq();
524 place_early(worklist);
526 /* place_early() invalidates the outs, place_late needs them. */
527 compute_irg_outs(irg);
529 /* Now move the nodes down in the dominator tree. This reduces the
530 unnecessary executions of the node. */
531 place_late(worklist);
533 set_irg_outs_inconsistent(irg);
534 set_irg_loopinfo_inconsistent(irg);
536 current_ir_graph = rem;