Fixed some typos.

[libfirm] / ir / ir / iropt.c
diff --git a/ir/ir/iropt.c b/ir/ir/iropt.c

index f83d623..ca58b2e 100644 (file)
--- a/ir/ir/iropt.c
+++ b/ir/ir/iropt.c
@@ -352,7 +352,7 @@ static ir_tarval *computed_value_Not(const ir_node *n)
  }  /* computed_value_Not */
  
  /**
- * Tests wether a shift shifts more bits than available in the mode
+ * Tests whether a shift shifts more bits than available in the mode
   */
  static bool is_oversize_shift(const ir_node *n)
  {
@@ -957,7 +957,7 @@ static ir_node *equivalent_node_Sub(ir_node *n)
  
  
  /**
- * Optimize an "self-inverse unary op", ie op(op(n)) = n.
+ * Optimize an "self-inverse unary op", i.e. op(op(n)) = n.
   *
   * @todo
   *   -(-a) == a, but might overflow two times.
@@ -2665,7 +2665,7 @@ make_tuple:
  
                 /* skip a potential Pin */
                 mem = skip_Pin(mem);
-               turn_into_tuple(n, pn_Div_max);
+               turn_into_tuple(n, pn_Div_max+1);
                 set_Tuple_pred(n, pn_Div_M,         mem);
                 set_Tuple_pred(n, pn_Div_X_regular, new_r_Jmp(blk));
                 set_Tuple_pred(n, pn_Div_X_except,  new_r_Bad(irg, mode_X));
@@ -2757,7 +2757,7 @@ make_tuple:
  
                 /* skip a potential Pin */
                 mem = skip_Pin(mem);
-               turn_into_tuple(n, pn_Mod_max);
+               turn_into_tuple(n, pn_Mod_max+1);
                 set_Tuple_pred(n, pn_Mod_M,         mem);
                 set_Tuple_pred(n, pn_Mod_X_regular, new_r_Jmp(blk));
                 set_Tuple_pred(n, pn_Mod_X_except,  new_r_Bad(irg, mode_X));
@@ -2791,7 +2791,7 @@ static ir_node *transform_node_Cond(ir_node *n)
                    Replace it by a tuple (Bad, Jmp) or (Jmp, Bad) */
                 ir_node *blk = get_nodes_block(n);
                 jmp = new_r_Jmp(blk);
-               turn_into_tuple(n, pn_Cond_max);
+               turn_into_tuple(n, pn_Cond_max+1);
                 if (ta == tarval_b_true) {
                         set_Tuple_pred(n, pn_Cond_false, new_r_Bad(irg, mode_X));
                         set_Tuple_pred(n, pn_Cond_true, jmp);
@@ -5147,7 +5147,7 @@ typedef ir_node*(*new_shift_func)(dbg_info *dbgi, ir_node *block,
   * then we can use that to minimize the value of Add(x, const) or
   * Sub(Const, x). In particular this often avoids 1 instruction in some
   * backends for the Shift(x, Sub(Const, y)) case because it can be replaced
- * by Shift(x, Minus(y)) which doesnt't need an explicit Const constructed.
+ * by Shift(x, Minus(y)) which does not need an explicit Const constructed.
   */
  static ir_node *transform_node_shift_modulo(ir_node *n,
                                              new_shift_func new_shift)
@@ -5384,7 +5384,7 @@ static ir_node *transform_node_End(ir_node *n)
                 /* no need to keep Bad */
                 if (is_Bad(ka))
                         continue;
-               /* dont keep unreachable code */
+               /* do not keep unreachable code */
                 block = is_Block(ka) ? ka : get_nodes_block(ka);
                 if (is_block_unreachable(block))
                         continue;
@@ -5503,9 +5503,6 @@ static ir_node *transform_node_Mux(ir_node *n)
         ir_node  *f    = get_Mux_false(n);
         ir_graph *irg  = get_irn_irg(n);
  
-       if (is_irg_state(irg, IR_GRAPH_STATE_KEEP_MUX))
-               return n;
-
         /* implement integer abs: abs(x) = x^(x >>s 31) - (x >>s 31) */
         if (get_mode_arithmetic(mode) == irma_twos_complement) {
                 int abs = ir_mux_is_abs(sel, t, f);
@@ -5527,6 +5524,9 @@ static ir_node *transform_node_Mux(ir_node *n)
                 }
         }
  
+       if (is_irg_state(irg, IR_GRAPH_STATE_KEEP_MUX))
+               return n;
+
         if (is_Mux(t)) {
                 ir_node*  block = get_nodes_block(n);
                 ir_node*  c0    = sel;
@@ -5833,7 +5833,7 @@ static ir_node *transform_node_Load(ir_node *n)
                         ir_node  *bad   = new_r_Bad(irg, mode_X);
                         ir_mode  *mode  = get_Load_mode(n);
                         ir_node  *res   = new_r_Proj(pred_load, mode, pn_Load_res);
-                       ir_node  *in[pn_Load_max] = { mem, jmp, bad, res };
+                       ir_node  *in[pn_Load_max+1] = { mem, res, jmp, bad };
                         ir_node  *tuple = new_r_Tuple(block, ARRAY_SIZE(in), in);
                         return tuple;
                 }
@@ -5853,7 +5853,7 @@ static ir_node *transform_node_Load(ir_node *n)
                         ir_graph *irg   = get_irn_irg(n);
                         ir_node  *bad   = new_r_Bad(irg, mode_X);
                         ir_node  *res   = value;
-                       ir_node  *in[pn_Load_max] = { mem, jmp, bad, res };
+                       ir_node  *in[pn_Load_max+1] = { mem, res, jmp, bad };
                         ir_node  *tuple = new_r_Tuple(block, ARRAY_SIZE(in), in);
                         return tuple;
                 }
@@ -6316,10 +6316,23 @@ int identities_cmp(const void *elt, const void *key)
                 /* for pinned nodes, the block inputs must be equal */
                 if (get_irn_n(a, -1) != get_irn_n(b, -1))
                         return 1;
-       } else if (! get_opt_global_cse()) {
-               /* for block-local CSE both nodes must be in the same Block */
-               if (get_nodes_block(a) != get_nodes_block(b))
-                       return 1;
+       } else {
+               ir_node *block_a = get_nodes_block(a);
+               ir_node *block_b = get_nodes_block(b);
+               if (! get_opt_global_cse()) {
+                       /* for block-local CSE both nodes must be in the same Block */
+                       if (block_a != block_b)
+                               return 1;
+               } else {
+                       /* The optimistic approach would be to do nothing here.
+                        * However doing GCSE optimistically produces a lot of partially dead code which appears
+                        * to be worse in practice than the missed opportunities.
+                        * So we use a very conservative variant here and only CSE if 1 value dominates the
+                        * other. */
+                       if (!block_dominates(block_a, block_b)
+                           && !block_dominates(block_b, block_a))
+                           return 1;
+               }
         }
  
         /* compare a->in[0..ins] with b->in[0..ins] */