Let list_for_each_entry(), list_for_each_entry_reverse() and list_for_each_entry_safe...
[libfirm] / ir / opt / code_placement.c
index 06889a6..22f41be 100644 (file)
 
 /**
  * @file
- * @brief    Code Placement.  Pins all floating nodes to a block where they
- *           will be executed only if needed.
+ * @brief    Move nodes to a block where they will be executed the least
+ *           often.
  * @author   Christian Schaefer, Goetz Lindenmaier, Sebastian Felis,
  *           Michael Beck
- * @version  $Id$
+ *
+ * The idea here is to push nodes as deep into the dominance tree as their
+ * dependencies allow. After pushing them back up out of as many loops as
+ * possible.
  */
 #include "config.h"
 
+#include <stdbool.h>
+
+#include "iroptimize.h"
 #include "adt/pdeq.h"
 #include "irnode_t.h"
 #include "irouts.h"
 #include "irgopt.h"
+#include "irpass.h"
 
-/**
- * Returns non-zero, is a block is not reachable from Start.
- *
- * @param block  the block to test
- */
-static int
-is_Block_unreachable(ir_node *block) {
-       return is_Block_dead(block) || get_Block_dom_depth(block) < 0;
+static bool is_block_reachable(ir_node *block)
+{
+       return get_Block_dom_depth(block) >= 0;
 }
 
 /**
  * Find the earliest correct block for node n.  --- Place n into the
  * same Block as its dominance-deepest Input.
  *
- * We have to avoid calls to get_nodes_block() here
- * because the graph is floating.
- *
  * move_out_of_loops() expects that place_floats_early() have placed
  * all "living" nodes into a living block. That's why we must
  * move nodes in dead block with "live" successors into a valid
  * block.
- * We move them just into the same block as it's successor (or
+ * We move them just into the same block as its successor (or
  * in case of a Phi into the effective use block). For Phi successors,
  * this may still be a dead block, but then there is no real use, as
  * the control flow will be dead later.
- *
- * @param n         the node to be placed
- * @param worklist  a worklist, predecessors of non-floating nodes are placed here
  */
-static void
-place_floats_early(ir_node *n, waitq *worklist) {
-       int i, irn_arity;
+static void place_floats_early(ir_node *n, waitq *worklist)
+{
+       int       i;
+       int       arity;
+       ir_node  *block;
+       int       new_depth;
+       ir_graph *irg;
+       ir_node  *start_block;
+       ir_node  *new_block;
 
        /* we must not run into an infinite loop */
-       assert(!irn_visited(n));
-       mark_irn_visited(n);
-
-       /* Place floating nodes. */
-       if (get_irn_pinned(n) == op_pin_state_floats) {
-               ir_node *curr_block = get_nodes_block(n);
-               int in_dead_block   = is_Block_unreachable(curr_block);
-               int depth           = 0;
-               ir_node *b          = NULL;   /* The block to place this node in */
-
-               assert(is_no_Block(n));
-
-               if (is_irn_start_block_placed(n)) {
-                       /* These nodes will not be placed by the loop below. */
-                       b = get_irg_start_block(current_ir_graph);
-                       depth = 1;
-               }
-
-               /* find the block for this node. */
-               irn_arity = get_irn_arity(n);
-               for (i = 0; i < irn_arity; i++) {
+       if (irn_visited_else_mark(n))
+               return;
+
+       /* The algorithm relies on the fact that all predecessors of a block are
+        * moved up after a call to place_float_early of the predecessors
+        * (see the loop below).
+        * However we have to break cycles somewhere. Relying on the visited flag
+        * will result in nodes not being moved up despite their place_floats_early
+        * call.
+        * Instead we break cycles at pinned nodes which won't move anyway:
+        * This works because in firm each cycle contains a Phi or Block node
+        * (which are pinned)
+        */
+       if (get_irn_pinned(n) != op_pin_state_floats) {
+               /* we can't move pinned nodes */
+               arity = get_irn_arity(n);
+               for (i = 0; i < arity; ++i) {
                        ir_node *pred = get_irn_n(n, i);
-                       ir_node *pred_block;
-
-                       if (!irn_visited(pred)
-                           && (get_irn_pinned(pred) == op_pin_state_floats)) {
-
-                               /*
-                                * If the current node is NOT in a dead block, but one of its
-                                * predecessors is, we must move the predecessor to a live block.
-                                * Such thing can happen, if global CSE chose a node from a dead block.
-                                * We move it simply to our block.
-                                * Note that neither Phi nor End nodes are floating, so we don't
-                                * need to handle them here.
-                                */
-                               if (! in_dead_block) {
-                                       if (get_irn_pinned(pred) == op_pin_state_floats &&
-                                               is_Block_unreachable(get_nodes_block(pred)))
-                                               set_nodes_block(pred, curr_block);
-                               }
-                               place_floats_early(pred, worklist);
-                       }
-
-                       /*
-                        * A node in the Bad block must stay in the bad block,
-                        * so don't compute a new block for it.
-                        */
-                       if (in_dead_block)
-                               continue;
-
-                       /* Because all loops contain at least one op_pin_state_pinned node, now all
-                          our inputs are either op_pin_state_pinned or place_early() has already
-                          been finished on them.  We do not have any unfinished inputs!  */
-                       pred_block = get_nodes_block(pred);
-                       if ((!is_Block_dead(pred_block)) &&
-                               (get_Block_dom_depth(pred_block) > depth)) {
-                               b = pred_block;
-                               depth = get_Block_dom_depth(pred_block);
-                       }
-                       /* Avoid that the node is placed in the Start block if we are not in the
-                          backend phase. */
-                       if (depth == 1 &&
-                                       get_Block_dom_depth(get_nodes_block(n)) > 1 &&
-                                       get_irg_phase_state(current_ir_graph) != phase_backend) {
-                               b = get_Block_cfg_out(get_irg_start_block(current_ir_graph), 0);
-                               assert(b != get_irg_start_block(current_ir_graph));
-                               depth = 2;
-                       }
+                       pdeq_putr(worklist, pred);
                }
-               if (b)
-                       set_nodes_block(n, b);
+               if (!is_Block(n))
+                       pdeq_putr(worklist, get_nodes_block(n));
+               return;
        }
 
-       /*
-        * Add predecessors of non floating nodes and non-floating predecessors
-        * of floating nodes to worklist and fix their blocks if the are in dead block.
-        */
-       irn_arity = get_irn_arity(n);
-
-       if (is_End(n)) {
-               /*
-                * Simplest case: End node. Predecessors are keep-alives,
-                * no need to move out of dead block.
-                */
-               for (i = -1; i < irn_arity; ++i) {
-                       ir_node *pred = get_irn_n(n, i);
-                       if (!irn_visited(pred))
-                               waitq_put(worklist, pred);
-               }
-       } else if (is_Block(n)) {
-               /*
-                * Blocks: Predecessors are control flow, no need to move
-                * them out of dead block.
-                */
-               for (i = irn_arity - 1; i >= 0; --i) {
-                       ir_node *pred = get_irn_n(n, i);
-                       if (!irn_visited(pred))
-                               waitq_put(worklist, pred);
-               }
-       } else if (is_Phi(n)) {
-               ir_node *pred;
-               ir_node *curr_block = get_nodes_block(n);
-               int in_dead_block   = is_Block_unreachable(curr_block);
-
-               /*
-                * Phi nodes: move nodes from dead blocks into the effective use
-                * of the Phi-input if the Phi is not in a bad block.
-                */
-               pred = get_nodes_block(n);
-               if (!irn_visited(pred))
-                       waitq_put(worklist, pred);
-
-               for (i = irn_arity - 1; i >= 0; --i) {
-                       ir_node *pred = get_irn_n(n, i);
+       block = get_nodes_block(n);
 
-                       if (!irn_visited(pred)) {
-                               if (! in_dead_block &&
-                                       get_irn_pinned(pred) == op_pin_state_floats &&
-                                       is_Block_unreachable(get_nodes_block(pred))) {
-                                       set_nodes_block(pred, get_Block_cfgpred_block(curr_block, i));
-                               }
-                               waitq_put(worklist, pred);
-                       }
-               }
-       } else {
-               ir_node *pred;
-               ir_node *curr_block = get_nodes_block(n);
-               int in_dead_block   = is_Block_unreachable(curr_block);
-
-               /*
-                * All other nodes: move nodes from dead blocks into the same block.
-                */
-               pred = get_nodes_block(n);
-               if (!irn_visited(pred))
-                       waitq_put(worklist, pred);
-
-               for (i = irn_arity - 1; i >= 0; --i) {
-                       ir_node *pred = get_irn_n(n, i);
+       /* first move predecessors up */
+       arity = get_irn_arity(n);
+       place_floats_early(block, worklist);
+       for (i = 0; i < arity; ++i) {
+               ir_node *pred = get_irn_n(n, i);
+               place_floats_early(pred, worklist);
+       }
 
-                       if (!irn_visited(pred)) {
-                               if (! in_dead_block &&
-                                       get_irn_pinned(pred) == op_pin_state_floats &&
-                                       is_Block_unreachable(get_nodes_block(pred))) {
-                                       set_nodes_block(pred, curr_block);
-                               }
-                               waitq_put(worklist, pred);
-                       }
+       /* determine earliest point */
+       new_block = NULL;
+       new_depth = 0;
+       for (i = 0; i < arity; ++i) {
+               ir_node *pred       = get_irn_n(n, i);
+               ir_node *pred_block = get_nodes_block(pred);
+               int      pred_depth = get_Block_dom_depth(pred_block);
+               if (pred_depth > new_depth) {
+                       new_depth = pred_depth;
+                       new_block = pred_block;
                }
        }
+
+       /* avoid moving nodes into the start block if we are not in the backend */
+       irg         = get_irn_irg(n);
+       start_block = get_irg_start_block(irg);
+       if (new_block == start_block && block != start_block &&
+           get_irg_phase_state(irg) != phase_backend) {
+               assert(get_Block_n_cfg_outs(start_block) == 1);
+               new_block = get_Block_cfg_out(start_block, 0);
+       }
+
+       /* Set the new block */
+       if (new_block != NULL)
+               set_nodes_block(n, new_block);
 }
 
 /**
  * Floating nodes form subgraphs that begin at nodes as Const, Load,
- * Start, Call and that end at op_pin_state_pinned nodes as Store, Call.  Place_early
- * places all floating nodes reachable from its argument through floating
- * nodes and adds all beginnings at op_pin_state_pinned nodes to the worklist.
+ * Start, Call and that end at op_pin_state_pinned nodes as Store, Call.
+ * Place_early places all floating nodes reachable from its argument through
+ * floating nodes and adds all beginnings at op_pin_state_pinned nodes to the
+ * worklist.
  *
  * @param worklist   a worklist, used for the algorithm, empty on in/output
  */
-static void place_early(waitq *worklist) {
+static void place_early(ir_graph *irg, waitq *worklist)
+{
        assert(worklist);
-       inc_irg_visited(current_ir_graph);
+       inc_irg_visited(irg);
 
        /* this inits the worklist */
-       place_floats_early(get_irg_end(current_ir_graph), worklist);
+       place_floats_early(get_irg_end(irg), worklist);
 
        /* Work the content of the worklist. */
        while (!waitq_empty(worklist)) {
-               ir_node *n = waitq_get(worklist);
+               ir_node *n = (ir_node*)waitq_get(worklist);
                if (!irn_visited(n))
                        place_floats_early(n, worklist);
        }
-       set_irg_pinned(current_ir_graph, op_pin_state_pinned);
+       set_irg_pinned(irg, op_pin_state_pinned);
 }
 
 /**
@@ -250,15 +165,13 @@ static void place_early(waitq *worklist) {
  *
  * @return  the deepest common dominator tree ancestor of block and dca
  */
-static ir_node *calc_dom_dca(ir_node *dca, ir_node *block) {
-       assert(block);
-
-       /* we do not want to place nodes in dead blocks */
-       if (is_Block_dead(block))
-               return dca;
+static ir_node *calc_dom_dca(ir_node *dca, ir_node *block)
+{
+       assert(block != NULL);
 
        /* We found a first legal placement. */
-       if (!dca) return block;
+       if (!dca)
+               return block;
 
        /* Find a placement that is dominates both, dca and block. */
        while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
@@ -279,7 +192,8 @@ static ir_node *calc_dom_dca(ir_node *dca, ir_node *block) {
  * I.e., DCA is the block where we might place PRODUCER.
  * A data flow edge points from producer to consumer.
  */
-static ir_node *consumer_dom_dca(ir_node *dca, ir_node *consumer, ir_node *producer)
+static ir_node *consumer_dom_dca(ir_node *dca, ir_node *consumer,
+                                 ir_node *producer)
 {
        /* Compute the last block into which we can place a node so that it is
           before consumer. */
@@ -293,9 +207,11 @@ static ir_node *consumer_dom_dca(ir_node *dca, ir_node *consumer, ir_node *produ
                for (i = 0;  i < arity; i++) {
                        if (get_Phi_pred(consumer, i) == producer) {
                                ir_node *new_block = get_Block_cfgpred_block(phi_block, i);
+                               if (is_Bad(new_block))
+                                       continue;
 
-                               if (!is_Block_unreachable(new_block))
-                                       dca = calc_dom_dca(dca, new_block);
+                               assert(is_block_reachable(new_block));
+                               dca = calc_dom_dca(dca, new_block);
                        }
                }
        } else {
@@ -304,36 +220,35 @@ static ir_node *consumer_dom_dca(ir_node *dca, ir_node *consumer, ir_node *produ
        return dca;
 }
 
-/* FIXME: the name clashes here with the function from ana/field_temperature.c
- * please rename. */
-static INLINE int get_irn_loop_depth(ir_node *n) {
-       return get_loop_depth(get_irn_loop(n));
+static inline int get_block_loop_depth(ir_node *block)
+{
+       return get_loop_depth(get_irn_loop(block));
 }
 
 /**
- * Move n to a block with less loop depth than it's current block. The
+ * Move n to a block with less loop depth than its current block. The
  * new block must be dominated by early.
  *
  * @param n      the node that should be moved
  * @param early  the earliest block we can n move to
  */
-static void move_out_of_loops(ir_node *n, ir_node *early) {
-       ir_node *best, *dca;
-       assert(n && early);
-
+static void move_out_of_loops(ir_node *n, ir_node *early)
+{
+       ir_node *block      = get_nodes_block(n);
+       ir_node *best       = block;
+       int      best_depth = get_block_loop_depth(best);
 
        /* Find the region deepest in the dominator tree dominating
           dca with the least loop nesting depth, but still dominated
           by our early placement. */
-       dca = get_nodes_block(n);
-
-       best = dca;
-       while (dca != early) {
-               dca = get_Block_idom(dca);
-               if (!dca || is_Bad(dca)) break; /* may be Bad if not reachable from Start */
-               if (get_irn_loop_depth(dca) < get_irn_loop_depth(best)) {
-                       best = dca;
+       while (block != early) {
+               ir_node *idom       = get_Block_idom(block);
+               int      idom_depth = get_block_loop_depth(idom);
+               if (idom_depth < best_depth) {
+                       best       = idom;
+                       best_depth = idom_depth;
                }
+               block = idom;
        }
        if (best != get_nodes_block(n))
                set_nodes_block(n, best);
@@ -349,29 +264,23 @@ static void move_out_of_loops(ir_node *n, ir_node *early) {
  *
  * @return the deepest common dominator ancestor of all blocks of node's users
  */
-static ir_node *get_deepest_common_dom_ancestor(ir_node *node, ir_node *dca) {
+static ir_node *get_deepest_common_dom_ancestor(ir_node *node, ir_node *dca)
+{
        int i;
 
        for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
                ir_node *succ = get_irn_out(node, i);
 
-               if (is_End(succ)) {
-                       /*
-                        * This consumer is the End node, a keep alive edge.
-                        * This is not a real consumer, so we ignore it
-                        */
+               /* keepalive edges are special and don't respect the dominance */
+               if (is_End(succ))
                        continue;
-               }
 
                if (is_Proj(succ)) {
                        /* Proj nodes are in the same block as node, so
                         * the users of Proj are our users. */
                        dca = get_deepest_common_dom_ancestor(succ, dca);
                } else {
-                       /* ignore if succ is in dead code */
-                       ir_node *succ_blk = get_nodes_block(succ);
-                       if (is_Block_unreachable(succ_blk))
-                               continue;
+                       assert(is_block_reachable(get_nodes_block(succ)));
                        dca = consumer_dom_dca(dca, succ, node);
                }
        }
@@ -384,7 +293,8 @@ static ir_node *get_deepest_common_dom_ancestor(ir_node *node, ir_node *dca) {
  * @param node   the mode_T node
  * @param block  the block to put the Proj nodes to
  */
-static void set_projs_block(ir_node *node, ir_node *block) {
+static void set_projs_block(ir_node *node, ir_node *block)
+{
        int i;
 
        for (i = get_irn_n_outs(node) - 1; i >= 0; --i) {
@@ -406,80 +316,54 @@ static void set_projs_block(ir_node *node, ir_node *block) {
  * with the least loop-nesting-depth.  This places N out of as many
  * loops as possible and then makes it as control dependent as
  * possible.
- *
- * @param n         the node to be placed
- * @param worklist  a worklist, all successors of non-floating nodes are
- *                  placed here
  */
-static void place_floats_late(ir_node *n, pdeq *worklist) {
-       int i, n_outs;
-       ir_node *early_blk;
-
-       assert(!irn_visited(n)); /* no multiple placement */
-
-       mark_irn_visited(n);
-
-       /* no need to place block nodes, control nodes are already placed. */
-       if (!is_Block(n) &&
-           (!is_cfop(n)) &&
-           (get_irn_mode(n) != mode_X)) {
-               /* Remember the early_blk placement of this block to move it
-                  out of loop no further than the early_blk placement. */
-               early_blk = get_nodes_block(n);
-
-               /*
-                * BEWARE: Here we also get code, that is live, but
-                * was in a dead block.  If the node is life, but because
-                * of CSE in a dead block, we still might need it.
-                */
-
-               /* Assure that our users are all placed, except the Phi-nodes.
-               --- Each data flow cycle contains at least one Phi-node.  We
-                   have to break the `user has to be placed before the
-                   producer' dependence cycle and the Phi-nodes are the
-                   place to do so, because we need to base our placement on the
-                   final region of our users, which is OK with Phi-nodes, as they
-                   are op_pin_state_pinned, and they never have to be placed after a
-                   producer of one of their inputs in the same block anyway. */
-               for (i = get_irn_n_outs(n) - 1; i >= 0; --i) {
-                       ir_node *succ = get_irn_out(n, i);
-                       if (!irn_visited(succ) && !is_Phi(succ))
-                               place_floats_late(succ, worklist);
-               }
+static void place_floats_late(ir_node *n, pdeq *worklist)
+{
+       int      n_outs;
+       int      i;
+       ir_node *block;
+       ir_node *dca;
 
-               if (! is_Block_dead(early_blk)) {
-                       /* do only move things that where not dead */
-                       ir_op *op = get_irn_op(n);
-
-                       /* We have to determine the final block of this node... except for
-                          constants and Projs */
-                       if ((get_irn_pinned(n) == op_pin_state_floats) &&
-                           (op != op_Const)    &&
-                           (op != op_SymConst) &&
-                           (op != op_Proj))
-                       {
-                               /* deepest common ancestor in the dominator tree of all nodes'
-                                  blocks depending on us; our final placement has to dominate
-                                  DCA. */
-                               ir_node *dca = get_deepest_common_dom_ancestor(n, NULL);
-                               if (dca != NULL) {
-                                       set_nodes_block(n, dca);
-                                       move_out_of_loops(n, early_blk);
-                                       if (get_irn_mode(n) == mode_T) {
-                                               set_projs_block(n, get_nodes_block(n));
-                                       }
-                               }
-                       }
+       if (irn_visited_else_mark(n))
+               return;
+
+       n_outs = get_irn_n_outs(n);
+       /* break cycles at pinned nodes (see place place_floats_early) as to why */
+       if (get_irn_pinned(n) != op_pin_state_floats) {
+               for (i = 0; i < n_outs; ++i) {
+                       ir_node *succ = get_irn_out(n, i);
+                       pdeq_putr(worklist, succ);
                }
+               return;
        }
 
-       /* Add successors of all non-floating nodes on list. (Those of floating
-          nodes are placed already and therefore are marked.)  */
-       n_outs = get_irn_n_outs(n);
-       for (i = 0; i < n_outs; i++) {
+       /* place our users */
+       for (i = 0; i < n_outs; ++i) {
                ir_node *succ = get_irn_out(n, i);
-               if (!irn_visited(succ)) {
-                       pdeq_putr(worklist, succ);
+               place_floats_late(succ, worklist);
+       }
+
+       /* no point in moving Projs around, they are moved with their predecessor */
+       if (is_Proj(n))
+               return;
+       /* some nodes should simply stay in the startblock */
+       if (is_irn_start_block_placed(n)) {
+               assert(get_nodes_block(n) == get_irg_start_block(get_irn_irg(n)));
+               return;
+       }
+
+       block = get_nodes_block(n);
+       assert(is_block_reachable(block));
+
+       /* deepest common ancestor in the dominator tree of all nodes'
+          blocks depending on us; our final placement has to dominate
+          DCA. */
+       dca = get_deepest_common_dom_ancestor(n, NULL);
+       if (dca != NULL) {
+               set_nodes_block(n, dca);
+               move_out_of_loops(n, block);
+               if (get_irn_mode(n) == mode_T) {
+                       set_projs_block(n, get_nodes_block(n));
                }
        }
 }
@@ -490,52 +374,67 @@ static void place_floats_late(ir_node *n, pdeq *worklist) {
  *
  * @param worklist   the worklist containing the nodes to place
  */
-static void place_late(waitq *worklist) {
+static void place_late(ir_graph *irg, waitq *worklist)
+{
        assert(worklist);
-       inc_irg_visited(current_ir_graph);
+       inc_irg_visited(irg);
 
        /* This fills the worklist initially. */
-       place_floats_late(get_irg_start_block(current_ir_graph), worklist);
+       place_floats_late(get_irg_start_block(irg), worklist);
 
        /* And now empty the worklist again... */
        while (!waitq_empty(worklist)) {
-               ir_node *n = waitq_get(worklist);
+               ir_node *n = (ir_node*)waitq_get(worklist);
                if (!irn_visited(n))
                        place_floats_late(n, worklist);
        }
 }
 
 /* Code Placement. */
-void place_code(ir_graph *irg) {
+void place_code(ir_graph *irg)
+{
        waitq *worklist;
-       ir_graph *rem = current_ir_graph;
-
-       current_ir_graph = irg;
-       remove_critical_cf_edges(irg);
 
        /* Handle graph state */
        assert(get_irg_phase_state(irg) != phase_building);
-       assure_doms(irg);
-
-       if (1 || get_irg_loopinfo_state(irg) != loopinfo_consistent) {
-               free_loop_information(irg);
-               construct_cf_backedges(irg);
-       }
+       assure_irg_properties(irg,
+               IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES |
+               IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE |
+               IR_GRAPH_PROPERTY_CONSISTENT_OUTS |
+               IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE |
+               IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
 
        /* Place all floating nodes as early as possible. This guarantees
         a legal code placement. */
        worklist = new_waitq();
-       place_early(worklist);
+       place_early(irg, worklist);
+
+       /* While GCSE might place nodes in unreachable blocks,
+        * these are now placed in reachable blocks. */
 
        /* Note: place_early changes only blocks, no data edges. So, the
         * data out edges are still valid, no need to recalculate them here. */
 
        /* Now move the nodes down in the dominator tree. This reduces the
           unnecessary executions of the node. */
-       place_late(worklist);
+       place_late(irg, worklist);
 
-       set_irg_outs_inconsistent(irg);
-       set_irg_loopinfo_inconsistent(irg);
        del_waitq(worklist);
-       current_ir_graph = rem;
+       confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
+}
+
+/**
+ * Wrapper for place_code() inside the place_code pass.
+ */
+static void place_code_wrapper(ir_graph *irg)
+{
+       set_opt_global_cse(1);
+       optimize_graph_df(irg);
+       place_code(irg);
+       set_opt_global_cse(0);
+}
+
+ir_graph_pass_t *place_code_pass(const char *name)
+{
+       return def_graph_pass(name ? name : "place", place_code_wrapper);
 }