2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Code Placement. Pins all floating nodes to a block where they
23 * will be executed only if needed.
24 * @author Christian Schaefer, Goetz Lindenmaier, Sebastian Felis,
30 #include "iroptimize.h"
37 static int is_Block_unreachable(ir_node *block)
39 return get_Block_dom_depth(block) < 0;
43 * Find the earliest correct block for node n. --- Place n into the
44 * same Block as its dominance-deepest Input.
46 * We have to avoid calls to get_nodes_block() here
47 * because the graph is floating.
49 * move_out_of_loops() expects that place_floats_early() have placed
50 * all "living" nodes into a living block. That's why we must
51 * move nodes in dead block with "live" successors into a valid
53 * We move them just into the same block as its successor (or
54 * in case of a Phi into the effective use block). For Phi successors,
55 * this may still be a dead block, but then there is no real use, as
56 * the control flow will be dead later.
58 * @param n the node to be placed
59 * @param worklist a worklist, predecessors of non-floating nodes are placed here
61 static void place_floats_early(ir_node *n, waitq *worklist)
65 /* we must not run into an infinite loop */
66 assert(!irn_visited(n));
69 /* Place floating nodes. */
70 if (get_irn_pinned(n) == op_pin_state_floats) {
71 ir_node *curr_block = get_nodes_block(n);
72 int in_dead_block = is_Block_unreachable(curr_block);
74 ir_node *b = NULL; /* The block to place this node in */
75 ir_graph *irg = get_irn_irg(n);
79 if (is_irn_start_block_placed(n)) {
80 /* These nodes will not be placed by the loop below. */
81 b = get_irg_start_block(irg);
85 /* find the block for this node. */
86 irn_arity = get_irn_arity(n);
87 for (i = 0; i < irn_arity; i++) {
88 ir_node *pred = get_irn_n(n, i);
91 if (!irn_visited(pred)
92 && (get_irn_pinned(pred) == op_pin_state_floats)) {
95 * If the current node is NOT in a dead block, but one of its
96 * predecessors is, we must move the predecessor to a live
98 * Such thing can happen, if global CSE chose a node from a
99 * dead block. We move it simply to our block.
100 * Note that neither Phi nor End nodes are floating, so we don't
101 * need to handle them here.
103 if (! in_dead_block) {
104 if (get_irn_pinned(pred) == op_pin_state_floats &&
105 is_Block_unreachable(get_nodes_block(pred)))
106 set_nodes_block(pred, curr_block);
108 place_floats_early(pred, worklist);
112 * A node in the Bad block must stay in the bad block,
113 * so don't compute a new block for it.
118 /* Because all loops contain at least one op_pin_state_pinned node,
119 now all our inputs are either op_pin_state_pinned or
120 place_early() has already been finished on them.
121 We do not have any unfinished inputs! */
122 pred_block = get_nodes_block(pred);
123 if (get_Block_dom_depth(pred_block) > depth) {
125 depth = get_Block_dom_depth(pred_block);
127 /* Avoid that the node is placed in the Start block if we are not
128 in the backend phase. */
130 get_Block_dom_depth(get_nodes_block(n)) > 1 &&
131 get_irg_phase_state(irg) != phase_backend) {
132 b = get_Block_cfg_out(get_irg_start_block(irg), 0);
133 assert(b != get_irg_start_block(irg));
138 set_nodes_block(n, b);
142 * Add predecessors of non floating nodes and non-floating predecessors
143 * of floating nodes to worklist and fix their blocks if the are in dead
146 irn_arity = get_irn_arity(n);
150 * Simplest case: End node. Predecessors are keep-alives,
151 * no need to move out of dead block.
153 for (i = -1; i < irn_arity; ++i) {
154 ir_node *pred = get_irn_n(n, i);
155 if (!irn_visited(pred))
156 waitq_put(worklist, pred);
158 } else if (is_Block(n)) {
160 * Blocks: Predecessors are control flow, no need to move
161 * them out of dead block.
163 for (i = irn_arity - 1; i >= 0; --i) {
164 ir_node *pred = get_irn_n(n, i);
165 if (!irn_visited(pred))
166 waitq_put(worklist, pred);
168 } else if (is_Phi(n)) {
170 ir_node *curr_block = get_nodes_block(n);
171 int in_dead_block = is_Block_unreachable(curr_block);
174 * Phi nodes: move nodes from dead blocks into the effective use
175 * of the Phi-input if the Phi is not in a bad block.
177 pred = get_nodes_block(n);
178 if (!irn_visited(pred))
179 waitq_put(worklist, pred);
181 for (i = irn_arity - 1; i >= 0; --i) {
182 ir_node *pred = get_irn_n(n, i);
184 if (!irn_visited(pred)) {
185 if (! in_dead_block &&
186 get_irn_pinned(pred) == op_pin_state_floats &&
187 is_Block_unreachable(get_nodes_block(pred))) {
188 set_nodes_block(pred, get_Block_cfgpred_block(curr_block, i));
190 waitq_put(worklist, pred);
195 ir_node *curr_block = get_nodes_block(n);
196 int in_dead_block = is_Block_unreachable(curr_block);
199 * All other nodes: move nodes from dead blocks into the same block.
201 pred = get_nodes_block(n);
202 if (!irn_visited(pred))
203 waitq_put(worklist, pred);
205 for (i = irn_arity - 1; i >= 0; --i) {
206 ir_node *pred = get_irn_n(n, i);
208 if (!irn_visited(pred)) {
209 if (! in_dead_block &&
210 get_irn_pinned(pred) == op_pin_state_floats &&
211 is_Block_unreachable(get_nodes_block(pred))) {
212 set_nodes_block(pred, curr_block);
214 waitq_put(worklist, pred);
221 * Floating nodes form subgraphs that begin at nodes as Const, Load,
222 * Start, Call and that end at op_pin_state_pinned nodes as Store, Call.
223 * Place_early places all floating nodes reachable from its argument through
224 * floating nodes and adds all beginnings at op_pin_state_pinned nodes to the
227 * @param worklist a worklist, used for the algorithm, empty on in/output
229 static void place_early(ir_graph *irg, waitq *worklist)
232 inc_irg_visited(irg);
234 /* this inits the worklist */
235 place_floats_early(get_irg_end(irg), worklist);
237 /* Work the content of the worklist. */
238 while (!waitq_empty(worklist)) {
239 ir_node *n = (ir_node*)waitq_get(worklist);
241 place_floats_early(n, worklist);
243 set_irg_pinned(irg, op_pin_state_pinned);
247 * Compute the deepest common dominator tree ancestor of block and dca.
249 * @param dca the deepest common dominator tree ancestor so far,
251 * @param block a block
253 * @return the deepest common dominator tree ancestor of block and dca
255 static ir_node *calc_dom_dca(ir_node *dca, ir_node *block)
257 assert(block != NULL);
259 /* We found a first legal placement. */
263 /* Find a placement that is dominates both, dca and block. */
264 while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
265 block = get_Block_idom(block);
267 while (get_Block_dom_depth(dca) > get_Block_dom_depth(block)) {
268 dca = get_Block_idom(dca);
271 while (block != dca) {
272 block = get_Block_idom(block); dca = get_Block_idom(dca);
278 * Deepest common dominance ancestor of DCA and CONSUMER of PRODUCER.
279 * I.e., DCA is the block where we might place PRODUCER.
280 * A data flow edge points from producer to consumer.
282 static ir_node *consumer_dom_dca(ir_node *dca, ir_node *consumer, ir_node *producer)
284 /* Compute the last block into which we can place a node so that it is
286 if (is_Phi(consumer)) {
287 /* our consumer is a Phi-node, the effective use is in all those
288 blocks through which the Phi-node reaches producer */
289 ir_node *phi_block = get_nodes_block(consumer);
290 int arity = get_irn_arity(consumer);
293 for (i = 0; i < arity; i++) {
294 if (get_Phi_pred(consumer, i) == producer) {
295 ir_node *new_block = get_Block_cfgpred_block(phi_block, i);
296 if (is_Bad(new_block))
299 if (!is_Block_unreachable(new_block))
300 dca = calc_dom_dca(dca, new_block);
304 dca = calc_dom_dca(dca, get_nodes_block(consumer));
309 static inline int get_block_loop_depth(ir_node *block)
311 return get_loop_depth(get_irn_loop(block));
315 * Move n to a block with less loop depth than its current block. The
316 * new block must be dominated by early.
318 * @param n the node that should be moved
319 * @param early the earliest block we can n move to
321 static void move_out_of_loops(ir_node *n, ir_node *early)
327 /* Find the region deepest in the dominator tree dominating
328 dca with the least loop nesting depth, but still dominated
329 by our early placement. */
330 dca = get_nodes_block(n);
333 while (dca != early) {
334 dca = get_Block_idom(dca);
335 if (!dca || is_Bad(dca)) break; /* may be Bad if not reachable from Start */
336 if (get_block_loop_depth(dca) < get_block_loop_depth(best)) {
340 if (best != get_nodes_block(n))
341 set_nodes_block(n, best);
345 * Calculate the deepest common ancestor in the dominator tree of all nodes'
346 * blocks depending on node; our final placement has to dominate DCA.
348 * @param node the definition node
349 * @param dca the deepest common ancestor block so far, initially
352 * @return the deepest common dominator ancestor of all blocks of node's users
354 static ir_node *get_deepest_common_dom_ancestor(ir_node *node, ir_node *dca)
358 for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
359 ir_node *succ = get_irn_out(node, i);
363 * This consumer is the End node, a keep alive edge.
364 * This is not a real consumer, so we ignore it
370 /* Proj nodes are in the same block as node, so
371 * the users of Proj are our users. */
372 dca = get_deepest_common_dom_ancestor(succ, dca);
374 /* ignore if succ is in dead code */
375 ir_node *succ_blk = get_nodes_block(succ);
376 if (is_Block_unreachable(succ_blk))
378 dca = consumer_dom_dca(dca, succ, node);
385 * Put all the Proj nodes of a node into a given block.
387 * @param node the mode_T node
388 * @param block the block to put the Proj nodes to
390 static void set_projs_block(ir_node *node, ir_node *block)
394 for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
395 ir_node *succ = get_irn_out(node, i);
397 assert(is_Proj(succ));
399 if (get_irn_mode(succ) == mode_T) {
400 set_projs_block(succ, block);
402 set_nodes_block(succ, block);
407 * Find the latest legal block for N and place N into the
408 * `optimal' Block between the latest and earliest legal block.
409 * The `optimal' block is the dominance-deepest block of those
410 * with the least loop-nesting-depth. This places N out of as many
411 * loops as possible and then makes it as control dependent as
414 * @param n the node to be placed
415 * @param worklist a worklist, all successors of non-floating nodes are
418 static void place_floats_late(ir_node *n, pdeq *worklist)
423 assert(!irn_visited(n)); /* no multiple placement */
427 /* no need to place block nodes, control nodes are already placed. */
430 (get_irn_mode(n) != mode_X)) {
432 /* Remember the early_blk placement of this block to move it
433 out of loop no further than the early_blk placement. */
434 early_blk = get_nodes_block(n);
437 * BEWARE: Here we also get code, that is live, but
438 * was in a dead block. If the node is life, but because
439 * of CSE in a dead block, we still might need it.
442 /* Assure that our users are all placed, except the Phi-nodes.
443 --- Each data flow cycle contains at least one Phi-node. We
444 have to break the `user has to be placed before the
445 producer' dependence cycle and the Phi-nodes are the
446 place to do so, because we need to base our placement on the
447 final region of our users, which is OK with Phi-nodes, as they
448 are op_pin_state_pinned, and they never have to be placed after a
449 producer of one of their inputs in the same block anyway. */
450 for (i = get_irn_n_outs(n) - 1; i >= 0; --i) {
451 ir_node *succ = get_irn_out(n, i);
452 if (!irn_visited(succ) && !is_Phi(succ))
453 place_floats_late(succ, worklist);
456 /* do only move things that where not dead */
459 /* We have to determine the final block of this node... except for
460 constants and Projs */
461 if ((get_irn_pinned(n) == op_pin_state_floats) &&
463 (op != op_SymConst) &&
466 /* deepest common ancestor in the dominator tree of all nodes'
467 blocks depending on us; our final placement has to dominate
469 ir_node *dca = get_deepest_common_dom_ancestor(n, NULL);
471 set_nodes_block(n, dca);
472 move_out_of_loops(n, early_blk);
473 if (get_irn_mode(n) == mode_T) {
474 set_projs_block(n, get_nodes_block(n));
480 /* Add successors of all non-floating nodes on list. (Those of floating
481 nodes are placed already and therefore are marked.) */
482 n_outs = get_irn_n_outs(n);
483 for (i = 0; i < n_outs; i++) {
484 ir_node *succ = get_irn_out(n, i);
485 if (!irn_visited(succ)) {
486 pdeq_putr(worklist, succ);
492 * Place floating nodes on the given worklist as late as possible using
493 * the dominance tree.
495 * @param worklist the worklist containing the nodes to place
497 static void place_late(ir_graph *irg, waitq *worklist)
500 inc_irg_visited(irg);
502 /* This fills the worklist initially. */
503 place_floats_late(get_irg_start_block(irg), worklist);
505 /* And now empty the worklist again... */
506 while (!waitq_empty(worklist)) {
507 ir_node *n = (ir_node*)waitq_get(worklist);
509 place_floats_late(n, worklist);
513 /* Code Placement. */
514 void place_code(ir_graph *irg)
518 remove_critical_cf_edges(irg);
520 /* Handle graph state */
521 assert(get_irg_phase_state(irg) != phase_building);
522 assure_irg_outs(irg);
526 /* Place all floating nodes as early as possible. This guarantees
527 a legal code placement. */
528 worklist = new_waitq();
529 place_early(irg, worklist);
531 /* Note: place_early changes only blocks, no data edges. So, the
532 * data out edges are still valid, no need to recalculate them here. */
534 /* Now move the nodes down in the dominator tree. This reduces the
535 unnecessary executions of the node. */
536 place_late(irg, worklist);
538 set_irg_outs_inconsistent(irg);
539 set_irg_loopinfo_inconsistent(irg);
544 * Wrapper for place_code() inside the place_code pass.
546 static void place_code_wrapper(ir_graph *irg)
548 set_opt_global_cse(1);
549 optimize_graph_df(irg);
551 set_opt_global_cse(0);
554 ir_graph_pass_t *place_code_pass(const char *name)
556 return def_graph_pass(name ? name : "place", place_code_wrapper);