fix fehler69
[libfirm] / ir / be / ia32 / ia32_optimize.c
index 58b96e9..e5790fb 100644 (file)
@@ -30,6 +30,7 @@
 #include "irnode.h"
 #include "irprog_t.h"
 #include "ircons.h"
+#include "irtools.h"
 #include "firm_types.h"
 #include "iredges.h"
 #include "tv.h"
@@ -52,7 +53,7 @@
 
 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
 
-#define AGGRESSIVE_AM
+//#define AGGRESSIVE_AM
 
 typedef enum {
        IA32_AM_CAND_NONE  = 0,  /**< no addressmode possible with irn inputs */
@@ -176,8 +177,7 @@ static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) {
                mem = get_irn_n(store, 3);
                spreg = arch_get_irn_register(cg->arch_env, curr_sp);
 
-               // create a push
-               push = new_rd_ia32_Push(NULL, irg, block, noreg, noreg, val, curr_sp, mem);
+               push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, val, curr_sp, mem);
 
                set_ia32_am_support(push, ia32_am_Source, ia32_am_unary);
                copy_ia32_Immop_attr(push, store);
@@ -187,9 +187,7 @@ static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) {
                // create stackpointer proj
                curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack);
                arch_set_irn_register(cg->arch_env, curr_sp, spreg);
-#ifdef SCHEDULE_PROJS
-               sched_add_before(irn, curr_sp);
-#endif
+
                // create memory proj
                mem_proj = new_r_Proj(irg, block, push, mode_M, pn_ia32_Push_M);
 
@@ -375,14 +373,14 @@ static int is_addr_candidate(const ir_node *irn)
        right = get_irn_n(irn, 3);
 
        if (pred_is_specific_nodeblock(block, left, is_ia32_Ld)) {
-               n         = ia32_get_irn_n_edges(left);
+               n = ia32_get_irn_n_edges(left);
                /* load with only one user: don't create LEA */
                if(n == 1)
                        return 0;
        }
 
        if (pred_is_specific_nodeblock(block, right, is_ia32_Ld)) {
-               n         = ia32_get_irn_n_edges(right);
+               n = ia32_get_irn_n_edges(right);
                if(n == 1)
                        return 0;
        }
@@ -404,24 +402,26 @@ static int is_addr_candidate(const ir_node *irn)
  * @param h           The height information of the irg
  * @param block       The block the Loads must/mustnot be in
  * @param irn         The irn to check
- * return 0 if irn is no candidate, 1 if left load can be used, 2 if right one, 3 for both
+ * @return 0 if irn is no candidate, 1 if left load can be used, 2 if right one, 3 for both
  */
 static ia32_am_cand_t is_am_candidate(heights_t *h, const ir_node *block, ir_node *irn) {
        ir_node *in, *load, *other, *left, *right;
        int      is_cand = 0, cand;
-       int arity;
+       int      arity;
+       int      is_binary;
 
        if (is_ia32_Ld(irn) || is_ia32_St(irn) ||
                is_ia32_vfild(irn) || is_ia32_vfist(irn) ||
-               is_ia32_GetST0(irn) || is_ia32_SetST0(irn) || is_ia32_xStoreSimple(irn))
+               is_ia32_xStoreSimple(irn))
                return 0;
 
        if(get_ia32_frame_ent(irn) != NULL)
                return IA32_AM_CAND_NONE;
 
-       left  = get_irn_n(irn, 2);
-       arity = get_irn_arity(irn);
-       if(get_ia32_am_arity(irn) == ia32_am_binary) {
+       left      = get_irn_n(irn, 2);
+       arity     = get_irn_arity(irn);
+       is_binary = get_ia32_am_arity(irn) == ia32_am_binary;
+       if(is_binary) {
                /* binary op */
                right = get_irn_n(irn, 3);
        } else {
@@ -452,7 +452,7 @@ static ia32_am_cand_t is_am_candidate(heights_t *h, const ir_node *block, ir_nod
                }
 
                /* If there is a data dependency of other irn from load: cannot use AM */
-               if (is_cand && get_nodes_block(other) == block) {
+               if (is_cand && is_binary && get_nodes_block(other) == block) {
                        other   = skip_Proj(other);
                        is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
                        /* this could happen in loops */
@@ -477,11 +477,15 @@ static ia32_am_cand_t is_am_candidate(heights_t *h, const ir_node *block, ir_nod
                other = left;
 
                /* 8bit Loads are not supported, they cannot be used with every register */
-               if (get_mode_size_bits(get_ia32_ls_mode(load)) < 16)
+               /* 8bit Loads are not supported (for binary ops),
+                * they cannot be used with every register */
+               if (get_ia32_am_arity(irn) == ia32_am_binary &&
+                               get_mode_size_bits(get_ia32_ls_mode(load)) < 16) {
                        is_cand = 0;
+               }
 
                /* If there is a data dependency of other irn from load: cannot use load */
-               if (is_cand && get_nodes_block(other) == block) {
+               if (is_cand && is_binary && get_nodes_block(other) == block) {
                        other   = skip_Proj(other);
                        is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
                        /* this could happen in loops */
@@ -642,13 +646,13 @@ static INLINE void try_add_to_sched(ir_node *irn, ir_node *res) {
  * all it's Projs are removed as well.
  * @param irn  The irn to be removed from schedule
  */
-static INLINE void try_remove_from_sched(ir_node *node)
+static INLINE void try_kill(ir_node *node)
 {
        if(get_irn_mode(node) == mode_T) {
                const ir_edge_t *edge, *next;
                foreach_out_edge_safe(node, edge, next) {
                        ir_node *proj = get_edge_src_irn(edge);
-                       try_remove_from_sched(proj);
+                       try_kill(proj);
                }
        }
 
@@ -928,35 +932,35 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) {
                try_add_to_sched(irn, res);
 
                /* exchange the old op with the new LEA */
-               try_remove_from_sched(irn);
+               try_kill(irn);
                exchange(irn, res);
 
                /* we will exchange it, report here before the Proj is created */
                if (shift && lea && lea_o) {
-                       try_remove_from_sched(shift);
-                       try_remove_from_sched(lea);
-                       try_remove_from_sched(lea_o);
+                       try_kill(shift);
+                       try_kill(lea);
+                       try_kill(lea_o);
                        DBG_OPT_LEA4(irn, lea_o, lea, shift, res);
                } else if (shift && lea) {
-                       try_remove_from_sched(shift);
-                       try_remove_from_sched(lea);
+                       try_kill(shift);
+                       try_kill(lea);
                        DBG_OPT_LEA3(irn, lea, shift, res);
                } else if (shift && lea_o) {
-                       try_remove_from_sched(shift);
-                       try_remove_from_sched(lea_o);
+                       try_kill(shift);
+                       try_kill(lea_o);
                        DBG_OPT_LEA3(irn, lea_o, shift, res);
                } else if (lea && lea_o) {
-                       try_remove_from_sched(lea);
-                       try_remove_from_sched(lea_o);
+                       try_kill(lea);
+                       try_kill(lea_o);
                        DBG_OPT_LEA3(irn, lea_o, lea, res);
                } else if (shift) {
-                       try_remove_from_sched(shift);
+                       try_kill(shift);
                        DBG_OPT_LEA2(irn, shift, res);
                } else if (lea) {
-                       try_remove_from_sched(lea);
+                       try_kill(lea);
                        DBG_OPT_LEA2(irn, lea, res);
                } else if (lea_o) {
-                       try_remove_from_sched(lea_o);
+                       try_kill(lea_o);
                        DBG_OPT_LEA2(irn, lea_o, res);
                } else {
                        DBG_OPT_LEA1(irn, res);
@@ -999,7 +1003,7 @@ static void merge_loadstore_lea(ir_node *irn, ir_node *lea) {
        set_irn_n(irn, 0, get_irn_n(lea, 0));
        set_irn_n(irn, 1, get_irn_n(lea, 1));
 
-       try_remove_from_sched(lea);
+       try_kill(lea);
 
        /* clear remat flag */
        set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
@@ -1020,6 +1024,8 @@ static void exchange_left_right(ir_node *irn, ir_node **left, ir_node **right,
 {
        ir_node *temp;
 
+       assert(is_ia32_commutative(irn));
+
        set_irn_n(irn, new_right, *right);
        set_irn_n(irn, new_left, *left);
 
@@ -1157,15 +1163,19 @@ static void optimize_load_conv(ir_node *node)
 
 static void optimize_conv_conv(ir_node *node)
 {
-       ir_node *pred;
-       ir_mode *pred_mode;
-       ir_mode *conv_mode;
+       ir_node *pred_proj, *pred, *result_conv;
+       ir_mode *pred_mode, *conv_mode;
 
        if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
                return;
 
        assert(n_ia32_Conv_I2I_val == n_ia32_Conv_I2I8Bit_val);
-       pred = get_irn_n(node, n_ia32_Conv_I2I_val);
+       pred_proj = get_irn_n(node, n_ia32_Conv_I2I_val);
+       if(is_Proj(pred_proj))
+               pred = get_Proj_pred(pred_proj);
+       else
+               pred = pred_proj;
+
        if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
                return;
 
@@ -1173,16 +1183,43 @@ static void optimize_conv_conv(ir_node *node)
         * so we only need the 2nd conv if it shrinks the mode */
        conv_mode = get_ia32_ls_mode(node);
        pred_mode = get_ia32_ls_mode(pred);
-       if(get_mode_size_bits(conv_mode) < get_mode_size_bits(pred_mode))
-               return;
+       /* if 2nd conv is smaller then first conv, then we can always take the 2nd
+        * conv */
+       if(get_mode_size_bits(conv_mode) <= get_mode_size_bits(pred_mode)) {
+               if(get_irn_n_edges(pred_proj) == 1) {
+                       result_conv = pred_proj;
+                       set_ia32_ls_mode(pred, conv_mode);
+               } else {
+                       /* TODO: construct syncs/stuff here but we'll probably end up with
+                        * 2 statements anyway */
+                       if(get_irn_mode(pred) == mode_T) {
+                               return;
+                       }
 
-       /* we can't eliminate an upconv signed->unsigned  */
-       if (get_mode_size_bits(conv_mode) != get_mode_size_bits(pred_mode) &&
-               !get_mode_sign(conv_mode) && get_mode_sign(pred_mode))
-               return;
+                       result_conv = exact_copy(pred);
+                       set_ia32_ls_mode(result_conv, conv_mode);
+               }
+       } else {
+               /* if both convs have the same sign, then we can take the smaller one */
+               if(get_mode_sign(conv_mode) == get_mode_sign(pred_mode)) {
+                       result_conv = pred_proj;
+               } else {
+                       /* no optimisation possible if smaller conv is sign-extend */
+                       if(mode_is_signed(pred_mode)) {
+                               return;
+                       }
+                       /* we can take the smaller conv if it is unsigned */
+                       result_conv = pred_proj;
+               }
+       }
 
        /* kill the conv */
-       exchange(node, pred);
+       exchange(node, result_conv);
+
+       if(get_irn_n_edges(pred) == 0) {
+               be_kill_node(pred);
+       }
+       optimize_conv_conv(result_conv);
 }
 
 static void optimize_node(ir_node *node, void *env)
@@ -1265,9 +1302,11 @@ static void optimize_am(ir_node *irn, void *env) {
 
        left  = get_irn_n(irn, 2);
        if (am_arity == ia32_am_unary) {
+               assert(get_irn_arity(irn) >= 4);
                right = left;
                assert(cand == IA32_AM_CAND_BOTH);
        } else {
+               assert(get_irn_arity(irn) >= 5);
                right = get_irn_n(irn, 3);
        }
 
@@ -1398,8 +1437,8 @@ static void optimize_am(ir_node *irn, void *env) {
                /* clear remat flag */
                set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
 
-               try_remove_from_sched(store);
-               try_remove_from_sched(load);
+               try_kill(store);
+               try_kill(load);
                DBG_OPT_AM_D(load, store, irn);
 
                DB((dbg, LEVEL_1, "merged with %+F and %+F into dest AM\n", load, store));
@@ -1425,19 +1464,29 @@ static void optimize_am(ir_node *irn, void *env) {
                assert(cand & IA32_AM_CAND_RIGHT);
                load = get_Proj_pred(right);
 
+               if(get_irn_n_edges(right) > 1) {
+                       source_possible = 0;
+               }
+#if 1
+               /* TODO: this isn't really needed, but the code below is buggy
+                  as heights won't get recomputed when the graph is reconstructed
+                  so we can only transform loads with the result proj only */
                if(get_irn_n_edges(load) > 1) {
                        source_possible = 0;
                }
+#endif
        }
 
        if (source_possible) {
                ir_mode *ls_mode = get_ia32_ls_mode(load);
-               if(get_mode_size_bits(ls_mode) != 32)
+               if(get_mode_size_bits(ls_mode) != 32 || ls_mode == mode_D)
                        source_possible = 0;
 
        }
 
        if (source_possible) {
+               const ia32_attr_t *attr_load = get_ia32_attr_const(load);
+               ia32_attr_t       *attr_irn  = get_ia32_attr(irn);
                addr_b = get_irn_n(load, 0);
                addr_i = get_irn_n(load, 1);
 
@@ -1449,7 +1498,16 @@ static void optimize_am(ir_node *irn, void *env) {
                set_ia32_am_flavour(irn, get_ia32_am_flavour(load));
                set_ia32_op_type(irn, ia32_AddrModeS);
                set_ia32_frame_ent(irn, get_ia32_frame_ent(load));
-               set_ia32_ls_mode(irn, get_ia32_ls_mode(load));
+               attr_irn->data.need_64bit_stackent
+                       = attr_load->data.need_64bit_stackent;
+               attr_irn->data.need_32bit_stackent
+                       = attr_load->data.need_32bit_stackent;
+
+               /* set ls_mode if not already present (conv nodes already have ls_mode
+                  set) */
+               if(get_ia32_ls_mode(irn) == NULL) {
+                       set_ia32_ls_mode(irn, get_ia32_ls_mode(load));
+               }
 
                set_ia32_am_sc(irn, get_ia32_am_sc(load));
                if (is_ia32_am_sc_sign(load))
@@ -1468,12 +1526,14 @@ static void optimize_am(ir_node *irn, void *env) {
                /* connect to Load memory and disconnect Load */
                if (am_arity == ia32_am_binary) {
                        /* binary AMop */
-                       set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3));
-                       set_irn_n(irn, 4, get_irn_n(load, 2));
+                       right = ia32_get_admissible_noreg(cg, irn, 3);
+                       set_irn_n(irn, 3, right);
+                       set_irn_n(irn, 4, get_irn_n(load, n_ia32_Load_mem));
                } else {
                        /* unary AMop */
-                       set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
-                       set_irn_n(irn, 3, get_irn_n(load, 2));
+                       right = ia32_get_admissible_noreg(cg, irn, 2);
+                       set_irn_n(irn, 2, right);
+                       set_irn_n(irn, 3, get_irn_n(load, n_ia32_Load_mem));
                }
 
                DBG_OPT_AM_S(load, irn);
@@ -1484,23 +1544,24 @@ static void optimize_am(ir_node *irn, void *env) {
                        ir_node *res_proj;
                        ir_mode *mode = get_irn_mode(irn);
 
-                       res_proj = new_rd_Proj(get_irn_dbg_info(irn), irg,
-                                              get_nodes_block(irn), new_Unknown(mode_T),
-                                              mode, 0);
-                       set_irn_mode(irn, mode_T);
-                       edges_reroute(irn, res_proj, irg);
-                       set_Proj_pred(res_proj, irn);
+                       if(mode != mode_T) {
+                               res_proj = new_rd_Proj(get_irn_dbg_info(irn), irg,
+                                                                          get_nodes_block(irn),
+                                                                          new_Unknown(mode_T), mode, 0);
+                               set_irn_mode(irn, mode_T);
+                               edges_reroute(irn, res_proj, irg);
+                               set_Proj_pred(res_proj, irn);
 
-                       set_Proj_pred(mem_proj, irn);
-                       set_Proj_proj(mem_proj, 1);
-
-                       if(sched_is_scheduled(irn)) {
-                               sched_add_after(irn, res_proj);
-                               sched_add_after(irn, mem_proj);
+                               set_Proj_pred(mem_proj, irn);
+                               set_Proj_proj(mem_proj, 1);
+                       } else {
+                               /* hacky: we need some proj number which is not used yet... */
+                               set_Proj_proj(mem_proj, -1);
+                               set_Proj_pred(mem_proj, irn);
                        }
                }
 
-               try_remove_from_sched(load);
+               try_kill(load);
                need_exchange_on_fail = 0;
 
                /* immediate are only allowed on the right side */