fixed CopyB emitter
[libfirm] / ir / be / ia32 / ia32_optimize.c
index dd39b54..755a281 100644 (file)
@@ -1,4 +1,4 @@
-/*
+/**
  * Project:     libFIRM
  * File name:   ir/be/ia32/ia32_optimize.c
  * Purpose:     Implements several optimizations for IA32
 #include "ia32_transform.h"
 #include "ia32_dbg_stat.h"
 
+typedef enum {
+       IA32_AM_CAND_NONE  = 0,
+       IA32_AM_CAND_LEFT  = 1,
+       IA32_AM_CAND_RIGHT = 2,
+       IA32_AM_CAND_BOTH  = 3
+} ia32_am_cand_t;
+
 #undef is_NoMem
 #define is_NoMem(irn) (get_irn_op(irn) == op_NoMem)
 
@@ -145,7 +152,7 @@ static entity *get_entity_for_tv(ia32_code_gen_t *cg, ir_node *cnst)
                if (tp == firm_unknown_type)
                        tp = get_prim_type(cg->isa->types, mode);
 
-               res = new_entity(get_glob_type(), unique_id("ia32FloatCnst_%u"), tp);
+               res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
 
                set_entity_ld_ident(res, get_entity_ident(res));
                set_entity_visibility(res, visibility_local);
@@ -233,6 +240,15 @@ void ia32_place_consts_set_modes(ir_node *irn, void *env) {
                set_irn_mode(irn, mode);
        }
 
+       /*
+               Annotate mode of stored value to link field of the Store
+               as floating point converts might be optimized and we would
+               loose the mode.
+       */
+       if (get_irn_opcode(irn) == iro_Store) {
+               set_irn_link(irn, get_irn_mode(get_Store_value(irn)));
+       }
+
        tenv.block    = get_nodes_block(irn);
        tenv.cg       = cg;
        tenv.irg      = cg->irg;
@@ -424,24 +440,39 @@ static void ia32_create_Push(ir_node *irn, ia32_code_gen_t *cg) {
        ir_node *sp  = get_irn_n(irn, 0);
        ir_node *val, *next, *push, *bl, *proj_M, *proj_res, *old_proj_M;
        const ir_edge_t *edge;
+       heights_t *h;
 
-       if (get_ia32_am_offs(irn) || !be_is_IncSP(sp))
+       /* do not create push if store has already an offset assigned or base is not a IncSP */
+       if (get_ia32_am_offs(irn) || ! be_is_IncSP(sp))
                return;
 
+       /* do not create push if index is not NOREG */
        if (arch_get_irn_register(cg->arch_env, get_irn_n(irn, 1)) !=
                &ia32_gp_regs[REG_GP_NOREG])
                return;
 
+       /* do not create push for floating point */
        val = get_irn_n(irn, 2);
        if (mode_is_float(get_irn_mode(val)))
                return;
 
+       /* do not create push if IncSp doesn't expand stack or expand size is different from register size */
        if (be_get_IncSP_direction(sp) != be_stack_dir_expand ||
                be_get_IncSP_offset(sp) != get_mode_size_bytes(ia32_reg_classes[CLASS_ia32_gp].mode))
                return;
 
+       /* do not create push, if there is a path (inside the block) from the push value to IncSP */
+       h = heights_new(cg->irg);
+       if (get_nodes_block(val) == get_nodes_block(sp) &&
+               heights_reachable_in_block(h, val, sp))
+       {
+               heights_free(h);
+               return;
+       }
+       heights_free(h);
+
        /* ok, translate into Push */
-       edge = get_irn_out_edge_first(irn);
+       edge       = get_irn_out_edge_first(irn);
        old_proj_M = get_edge_src_irn(edge);
 
        next = sched_next(irn);
@@ -700,7 +731,7 @@ static int pred_is_specific_nodeblock(const ir_node *bl, const ir_node *pred,
  * return 1 if irn is a candidate, 0 otherwise
  */
 static int is_addr_candidate(const ir_node *block, const ir_node *irn) {
-       ir_node *in, *load, *other, *left, *right;
+       ir_node *in, *left, *right;
        int      n, is_cand = 1;
 
        left  = get_irn_n(irn, 2);
@@ -733,16 +764,17 @@ static int is_addr_candidate(const ir_node *block, const ir_node *irn) {
  * - the load must not have other users than the irn             AND
  * - the irn must not have a frame entity set
  *
+ * @param cg          The ia32 code generator
  * @param h           The height information of the irg
  * @param block       The block the Loads must/mustnot be in
  * @param irn         The irn to check
- * return 1 if irn is a candidate, 0 otherwise
+ * return 0 if irn is no candidate, 1 if left load can be used, 2 if right one, 3 for both
  */
-static int is_am_candidate(heights_t *h, const ir_node *block, ir_node *irn) {
+static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const ir_node *block, ir_node *irn) {
        ir_node *in, *load, *other, *left, *right;
-       int      n, is_cand = 0;
+       int      n, is_cand = 0, cand;
 
-       if (is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn))
+       if (is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn) || is_ia32_vfild(irn) || is_ia32_vfist(irn))
                return 0;
 
        left  = get_irn_n(irn, 2);
@@ -758,11 +790,15 @@ static int is_am_candidate(heights_t *h, const ir_node *block, ir_node *irn) {
                other = right;
 
                /* If there is a data dependency of other irn from load: cannot use AM */
-               if (get_nodes_block(other) == block)
-                       is_cand = heights_reachable_in_block(h, load, other) ? 0 : is_cand;
+               if (get_nodes_block(other) == block) {
+                       other   = skip_Proj(other);
+                       is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
+               }
        }
 
-       in = right;
+       cand    = is_cand ? IA32_AM_CAND_LEFT : IA32_AM_CAND_NONE;
+       in      = right;
+       is_cand = 0;
 
        if (pred_is_specific_nodeblock(block, in, is_ia32_Ld)) {
                n         = ia32_get_irn_n_edges(in);
@@ -772,13 +808,28 @@ static int is_am_candidate(heights_t *h, const ir_node *block, ir_node *irn) {
                other = left;
 
                /* If there is a data dependency of other irn from load: cannot use load */
-               if (get_nodes_block(other) == block)
-                       is_cand = heights_reachable_in_block(h, load, other) ? 0 : is_cand;
+               if (get_nodes_block(other) == block) {
+                       other   = skip_Proj(other);
+                       is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
+               }
        }
 
-       is_cand = get_ia32_frame_ent(irn) ? 0 : is_cand;
+       cand = is_cand ? (cand | IA32_AM_CAND_RIGHT) : cand;
 
-       return is_cand;
+       /* check some special cases */
+       if (USE_SSE2(cg) && is_ia32_Conv_I2FP(irn)) {
+               /* SSE Conv I -> FP cvtsi2s(s|d) can only load 32 bit values */
+               if (get_mode_size_bits(get_ia32_tgt_mode(irn)) != 32)
+                       cand = IA32_AM_CAND_NONE;
+       }
+       else if (is_ia32_Conv_I2I(irn)) {
+               /* we cannot load an N bit value and implicitly convert it into an M bit value if N > M */
+               if (get_mode_size_bits(get_ia32_src_mode(irn)) > get_mode_size_bits(get_ia32_tgt_mode(irn)))
+                       cand = IA32_AM_CAND_NONE;
+       }
+
+       /* if the irn has a frame entity: we do not use address mode */
+       return get_ia32_frame_ent(irn) ? IA32_AM_CAND_NONE : cand;
 }
 
 /**
@@ -1007,12 +1058,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn, ir_node *noreg) {
        }
 
        /* determine the operand which needs to be checked */
-       if (be_is_NoReg(cg, right)) {
-               temp = left;
-       }
-       else {
-               temp = right;
-       }
+       temp = be_is_NoReg(cg, right) ? left : right;
 
        /* check if right operand is AMConst (LEA with ia32_am_O)  */
        /* but we can only eat it up if there is no other symconst */
@@ -1026,6 +1072,9 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn, ir_node *noreg) {
                have_am_sc = 1;
                dolea      = 1;
                lea_o      = temp;
+
+               if (temp == base)
+                       base = noreg;
        }
 
        if (isadd) {
@@ -1071,7 +1120,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn, ir_node *noreg) {
                                /* index != right -> we found a good Shl           */
                                /* left  != LEA   -> this Shl was the left operand */
                                /* -> base is right operand                        */
-                               base = right;
+                               base = (right == lea_o) ? noreg : right;
                        }
                }
        }
@@ -1165,7 +1214,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn, ir_node *noreg) {
 
                am_flav = ia32_am_N;
                /* determine new am flavour */
-               if (offs || offs_cnst || offs_lea) {
+               if (offs || offs_cnst || offs_lea || have_am_sc) {
                        am_flav |= ia32_O;
                }
                if (! be_is_NoReg(cg, base)) {
@@ -1279,15 +1328,13 @@ static void exchange_left_right(ir_node *irn, ir_node **left, ir_node **right, i
 
 /**
  * Performs address calculation optimization (create LEAs if possible)
- * @return 1 if performed optimization, 0 otherwise
  */
-static int optimize_address_calculation(ir_node *irn, void *env) {
+static void optimize_lea(ir_node *irn, void *env) {
        ia32_code_gen_t *cg  = env;
-       int              ret = 0;
        ir_node         *block, *noreg_gp, *left, *right;
 
        if (! is_ia32_irn(irn))
-               return ret;
+               return;
 
        /* Following cases can occur:                                  */
        /* - Sub (l, imm) -> LEA [base - offset]                       */
@@ -1311,9 +1358,8 @@ static int optimize_address_calculation(ir_node *irn, void *env) {
 
                        DBG((cg->mod, LEVEL_1, "\tfound address calculation candidate %+F ... ", irn));
                        res = fold_addr(cg, irn, noreg_gp);
-                       ret = (res != irn);
 
-                       if (ret)
+                       if (res != irn)
                                DB((cg->mod, LEVEL_1, "transformed into %+F\n", res));
                        else
                                DB((cg->mod, LEVEL_1, "not transformed\n"));
@@ -1334,14 +1380,13 @@ static int optimize_address_calculation(ir_node *irn, void *env) {
 
                                if (src && (is_ia32_Ld(src) || is_ia32_St(src) || is_ia32_Store8Bit(src))) {
                                        DBG((cg->mod, LEVEL_1, "\nmerging %+F into %+F\n", left, irn));
-                                       merge_loadstore_lea(src, left);
-                                       ret = 1;
+                                       if (! is_ia32_got_lea(src))
+                                               merge_loadstore_lea(src, left);
+                                       set_ia32_got_lea(src);
                                }
                        }
                }
        }
-
-       return ret;
 }
 
 
@@ -1355,7 +1400,7 @@ static void optimize_am(ir_node *irn, void *env) {
        ia32_code_gen_t   *cg         = am_opt_env->cg;
        heights_t         *h          = am_opt_env->h;
        ir_node           *block, *noreg_gp, *noreg_fp;
-       ir_node           *left, *right, *temp;
+       ir_node           *left, *right;
        ir_node           *store, *load, *mem_proj;
        ir_node           *succ, *addr_b, *addr_i;
        int               check_am_src          = 0;
@@ -1385,7 +1430,15 @@ static void optimize_am(ir_node *irn, void *env) {
        /*     - the Load and Store are in the same block AND                               */
        /*     - nobody else uses the result of the op                                      */
 
-       if ((get_ia32_am_support(irn) != ia32_am_None) && ! is_ia32_Lea(irn) && is_am_candidate(h, block, irn)) {
+       if ((get_ia32_am_support(irn) != ia32_am_None) && ! is_ia32_Lea(irn)) {
+               ia32_am_cand_t cand      = is_am_candidate(cg, h, block, irn);
+               ia32_am_cand_t orig_cand = cand;
+
+               /* cand == 1: load is left;   cand == 2: load is right; */
+
+               if (cand == IA32_AM_CAND_NONE)
+                       return;
+
                DBG((mod, LEVEL_1, "\tfound address mode candidate %+F ... ", irn));
 
                left  = get_irn_n(irn, 2);
@@ -1398,25 +1451,25 @@ static void optimize_am(ir_node *irn, void *env) {
                }
 
                /* normalize commutative ops */
-               if (node_is_ia32_comm(irn)) {
+               if (node_is_ia32_comm(irn) && (cand == IA32_AM_CAND_LEFT)) {
+
                        /* Assure that right operand is always a Load if there is one    */
                        /* because non-commutative ops can only use Dest AM if the right */
                        /* operand is a load, so we only need to check right operand.    */
-                       if (pred_is_specific_nodeblock(block, left, is_ia32_Ld))
-                       {
-                               exchange_left_right(irn, &left, &right, 3, 2);
-                               need_exchange_on_fail = 1;
-                       }
+
+                       exchange_left_right(irn, &left, &right, 3, 2);
+                       need_exchange_on_fail = 1;
+
+                       /* now: load is right */
+                       cand = IA32_AM_CAND_RIGHT;
                }
 
                /* check for Store -> op -> Load */
 
                /* Store -> op -> Load optimization is only possible if supported by op */
                /* and if right operand is a Load                                       */
-               if ((get_ia32_am_support(irn) & ia32_am_Dest) &&
-                        pred_is_specific_nodeblock(block, right, is_ia32_Ld))
+               if ((get_ia32_am_support(irn) & ia32_am_Dest) && (cand & IA32_AM_CAND_RIGHT))
                {
-
                        /* An address mode capable op always has a result Proj.                  */
                        /* If this Proj is used by more than one other node, we don't need to    */
                        /* check further, otherwise we check for Store and remember the address, */
@@ -1445,24 +1498,14 @@ static void optimize_am(ir_node *irn, void *env) {
 
                                /* Extra check for commutative ops with two Loads */
                                /* -> put the interesting Load right              */
-                               if (node_is_ia32_comm(irn) &&
-                                       pred_is_specific_nodeblock(block, left, is_ia32_Ld))
-                               {
+                               if (node_is_ia32_comm(irn) && (cand == IA32_AM_CAND_BOTH)) {
                                        if ((addr_b == get_irn_n(get_Proj_pred(left), 0)) &&
                                                (addr_i == get_irn_n(get_Proj_pred(left), 1)))
                                        {
                                                /* We exchange left and right, so it's easier to kill     */
                                                /* the correct Load later and to handle unary operations. */
-                                               set_irn_n(irn, 2, right);
-                                               set_irn_n(irn, 3, left);
-
-                                               temp  = left;
-                                               left  = right;
-                                               right = temp;
-
-                                               /* this is only needed for Compares, but currently ALL nodes
-                                                * have this attribute :-) */
-                                               set_ia32_pncode(irn, get_inversed_pnc(get_ia32_pncode(irn)));
+                                               exchange_left_right(irn, &left, &right, 3, 2);
+                                               need_exchange_on_fail ^= 1;
                                        }
                                }
 
@@ -1514,6 +1557,8 @@ static void optimize_am(ir_node *irn, void *env) {
                                        DBG_OPT_AM_D(load, store, irn);
 
                                        DB((mod, LEVEL_1, "merged with %+F and %+F into dest AM\n", load, store));
+
+                                       need_exchange_on_fail = 0;
                                }
                        } /* if (store) */
                        else if (get_ia32_am_support(irn) & ia32_am_Source) {
@@ -1527,27 +1572,32 @@ static void optimize_am(ir_node *irn, void *env) {
                }
 
                /* was exchanged but optimize failed: exchange back */
-               if (check_am_src && need_exchange_on_fail)
+               if (need_exchange_on_fail) {
                        exchange_left_right(irn, &left, &right, 3, 2);
+                       cand = orig_cand;
+               }
 
                need_exchange_on_fail = 0;
 
                /* normalize commutative ops */
-               if (check_am_src && node_is_ia32_comm(irn)) {
+               if (check_am_src && node_is_ia32_comm(irn) && (cand == IA32_AM_CAND_RIGHT)) {
+
                        /* Assure that left operand is always a Load if there is one */
                        /* because non-commutative ops can only use Source AM if the */
                        /* left operand is a Load, so we only need to check the left */
                        /* operand afterwards.                                       */
-                       if (pred_is_specific_nodeblock(block, right, is_ia32_Ld))       {
-                               exchange_left_right(irn, &left, &right, 3, 2);
-                               need_exchange_on_fail = 1;
-                       }
+
+                       exchange_left_right(irn, &left, &right, 3, 2);
+                       need_exchange_on_fail = 1;
+
+                       /* now: load is left */
+                       cand = IA32_AM_CAND_LEFT;
                }
 
                /* optimize op -> Load iff Load is only used by this op   */
                /* and left operand is a Load which only used by this irn */
-               if (check_am_src                                        &&
-                       pred_is_specific_nodeblock(block, left, is_ia32_Ld) &&
+               if (check_am_src               &&
+                       (cand & IA32_AM_CAND_LEFT) &&
                        (ia32_get_irn_n_edges(left) == 1))
                {
                        left = get_Proj_pred(left);
@@ -1616,29 +1666,6 @@ static void optimize_am(ir_node *irn, void *env) {
        }
 }
 
-/**
- * This function is called by a walker and performs LEA optimization only.
- * It's a wrapper for optimize_address_calculation because this one returns
- * the transformed irn (or NULL) which gives a type mismatch for walker
- * functions.
- */
-static void optimize_lea(ir_node *irn, void *env) {
-       (void)optimize_address_calculation(irn, env);
-}
-
-/**
- * This function first performs LEA optimization and if this failed
- * it performs address mode optimization.
- */
-static void optimize_all(ir_node *irn, void *env) {
-       ia32_am_opt_env_t *am_opt_env = env;
-
-       if (! optimize_address_calculation(irn, am_opt_env->cg)) {
-               /* irn was not transformed into LEA: check for am */
-               optimize_am(irn, env);
-       }
-}
-
 /**
  * Performs address mode optimization.
  */
@@ -1653,7 +1680,15 @@ void ia32_optimize_addressmode(ia32_code_gen_t *cg) {
                return;
        }
 
-       if ((cg->opt & IA32_OPT_DOAM)) {
+       /* beware: we cannot optimize LEA and AM in one run because */
+       /*         LEA optimization adds new nodes to the irg which */
+       /*         invalidates the phase data                       */
+
+       if (cg->opt & IA32_OPT_LEA) {
+               irg_walk_blkwise_graph(cg->irg, NULL, optimize_lea, cg);
+       }
+
+       if (cg->opt & IA32_OPT_DOAM) {
                /* we need height information for am optimization */
                heights_t *h = heights_new(cg->irg);
                ia32_am_opt_env_t env;
@@ -1661,19 +1696,8 @@ void ia32_optimize_addressmode(ia32_code_gen_t *cg) {
                env.cg = cg;
                env.h  = h;
 
-               if (cg->opt & IA32_OPT_LEA) {
-                       /* optimize AM and LEA */
-                       irg_walk_blkwise_graph(cg->irg, NULL, optimize_all, &env);
-               }
-               else {
-                       /* optimize AM only */
-                       irg_walk_blkwise_graph(cg->irg, NULL, optimize_am, &env);
-               }
+               irg_walk_blkwise_graph(cg->irg, NULL, optimize_am, &env);
 
                heights_free(h);
        }
-       else {
-               /* optimize LEA only */
-               irg_walk_blkwise_graph(cg->irg, NULL, optimize_lea, cg);
-       }
 }