X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fia32_optimize.c;h=600fb26f624c380a49fe71cabbe7162c9cbe1be0;hb=5416b9ba504458151bbcf513b914cc48cebbbded;hp=14b80fd4a2d4c70621aebb9ecf943b093576dcff;hpb=7bdbfb6451f4da9ee13c2b1b643b6ac7b58900f2;p=libfirm diff --git a/ir/be/ia32/ia32_optimize.c b/ir/be/ia32/ia32_optimize.c index 14b80fd4a..600fb26f6 100644 --- a/ir/be/ia32/ia32_optimize.c +++ b/ir/be/ia32/ia32_optimize.c @@ -1,13 +1,28 @@ -/** - * Project: libFIRM - * File name: ir/be/ia32/ia32_optimize.c - * Purpose: Implements several optimizations for IA32 - * Author: Christian Wuerdig - * CVS-ID: $Id$ - * Copyright: (c) 2006 Universitaet Karlsruhe - * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. +/* + * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved. + * + * This file is part of libFirm. + * + * This file may be distributed and/or modified under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation and appearing in the file LICENSE.GPL included in the + * packaging of this file. + * + * Licensees holding valid libFirm Professional Edition licenses may use + * this file in accordance with the libFirm Commercial License. + * Agreement provided with the Software. + * + * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE + * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. */ +/** + * @file + * @brief Implements several optimizations for IA32. + * @author Christian Wuerdig + * @version $Id$ + */ #ifdef HAVE_CONFIG_H #include "config.h" #endif @@ -30,11 +45,13 @@ #include "ia32_new_nodes.h" #include "bearch_ia32_t.h" -#include "gen_ia32_regalloc_if.h" /* the generated interface (register type and class defenitions) */ +#include "gen_ia32_regalloc_if_t.h" #include "ia32_transform.h" #include "ia32_dbg_stat.h" #include "ia32_util.h" +DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) + #define AGGRESSIVE_AM typedef enum { @@ -60,9 +77,7 @@ void ia32_pre_transform_phase(ia32_code_gen_t *cg) { - the psi condition tree transformer needs existing constants to be ia32 constants - the psi condition tree transformer inserts new firm constants which need to be transformed */ - //ia32_transform_all_firm_consts(cg); irg_walk_graph(cg->irg, NULL, ia32_transform_psi_cond_tree, cg); - //ia32_transform_all_firm_consts(cg); } /******************************************************************************************************** @@ -80,8 +95,18 @@ void ia32_pre_transform_phase(ia32_code_gen_t *cg) { * NOTE: THESE PEEPHOLE OPTIMIZATIONS MUST BE CALLED AFTER SCHEDULING AND REGISTER ALLOCATION. */ -static int ia32_cnst_compare(ir_node *n1, ir_node *n2) { - return get_ia32_id_cnst(n1) == get_ia32_id_cnst(n2); +static int ia32_const_equal(const ir_node *n1, const ir_node *n2) { + if(get_ia32_immop_type(n1) != get_ia32_immop_type(n2)) + return 0; + + if(get_ia32_immop_type(n1) == ia32_ImmConst) { + return get_ia32_Immop_tarval(n1) == get_ia32_Immop_tarval(n2); + } else if(get_ia32_immop_type(n1) == ia32_ImmSymConst) { + return get_ia32_Immop_symconst(n1) == get_ia32_Immop_symconst(n2); + } + + assert(get_ia32_immop_type(n1) == ia32_ImmNone); + return 1; } /** @@ -132,10 +157,10 @@ static int is_TestJmp_replacement(ir_node *cand, ir_node *irn) { } } - if (same_args) - return ia32_cnst_compare(cand, irn); + if (!same_args) + return 0; - return 0; + return ia32_const_equal(cand, irn); } /** @@ -149,14 +174,14 @@ static void ia32_optimize_TestJmp(ir_node *irn, ia32_code_gen_t *cg) { replace = cand ? is_TestJmp_replacement(cand, irn) : 0; if (replace) { - DBG((cg->mod, LEVEL_1, "replacing %+F by ", irn)); + DBG((dbg, LEVEL_1, "replacing %+F by ", irn)); if (is_ia32_And(cand)) set_irn_op(irn, op_ia32_CJmpAM); else set_irn_op(irn, op_ia32_CJmp); - DB((cg->mod, LEVEL_1, "%+F\n", irn)); + DB((dbg, LEVEL_1, "%+F\n", irn)); } } @@ -168,20 +193,16 @@ static int is_CondJmp_cand(const ir_node *irn) { * Checks if the arguments of cand are the same of irn. */ static int is_CondJmp_replacement(ir_node *cand, ir_node *irn) { - int i, n = get_irn_arity(cand); - int same_args = 1; + int i, arity; - for (i = 0; i < n; i++) { + arity = get_irn_arity(cand); + for (i = 0; i < arity; i++) { if (get_irn_n(cand, i) != get_irn_n(irn, i)) { - same_args = 0; - break; + return 0; } } - if (same_args) - return ia32_cnst_compare(cand, irn); - - return 0; + return ia32_const_equal(cand, irn); } /** @@ -195,12 +216,12 @@ static void ia32_optimize_CondJmp(ir_node *irn, ia32_code_gen_t *cg) { replace = cand ? is_CondJmp_replacement(cand, irn) : 0; if (replace) { - DBG((cg->mod, LEVEL_1, "replacing %+F by ", irn)); + DBG((dbg, LEVEL_1, "replacing %+F by ", irn)); DBG_OPT_CJMP(irn); set_irn_op(irn, op_ia32_CJmpAM); - DB((cg->mod, LEVEL_1, "%+F\n", irn)); + DB((dbg, LEVEL_1, "%+F\n", irn)); } } @@ -235,10 +256,8 @@ static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) { * attached to the node */ for(node = sched_next(irn); !sched_is_end(node); node = sched_next(node)) { - const char *am_offs; ir_node *mem; - int offset = -1; - int n; + int offset; int storeslot; // it has to be a store @@ -257,18 +276,7 @@ static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) { if( (get_ia32_am_flavour(node) & ia32_am_IS) != 0) break; - am_offs = get_ia32_am_offs(node); - if(am_offs == NULL) { - offset = 0; - } else { - // the am_offs has to be of the form "+NUMBER" - if(sscanf(am_offs, "+%d%n", &offset, &n) != 1 || am_offs[n] != '\0') { - // we shouldn't have any cases in the compiler at the moment - // that produce something different from esp+XX - assert(0); - break; - } - } + offset = get_ia32_am_offs_int(node); storeslot = offset / 4; if(storeslot >= MAXPUSH_OPTIMIZE) @@ -296,7 +304,7 @@ static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) { for( ; i >= 0; --i) { const arch_register_t *spreg; ir_node *push; - ir_node *val, *mem; + ir_node *val, *mem, *mem_proj; ir_node *store = stores[i]; ir_node *noreg = ia32_new_NoReg_gp(cg); @@ -309,9 +317,10 @@ static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) { // create a push push = new_rd_ia32_Push(NULL, irg, block, noreg, noreg, val, curr_sp, mem); - if(get_ia32_immop_type(store) != ia32_ImmNone) { - copy_ia32_Immop_attr(push, store); - } + + set_ia32_am_support(push, ia32_am_Source); + copy_ia32_Immop_attr(push, store); + sched_add_before(irn, push); // create stackpointer proj @@ -319,14 +328,14 @@ static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) { arch_set_irn_register(cg->arch_env, curr_sp, spreg); sched_add_before(irn, curr_sp); - // rewire users - edges_reroute(store, push, irg); + // create memory proj + mem_proj = new_r_Proj(irg, block, push, mode_M, pn_ia32_Push_M); + sched_add_before(irn, mem_proj); + + // use the memproj now + exchange(store, mem_proj); // we can remove the store now - set_irn_n(store, 0, new_Bad()); - set_irn_n(store, 1, new_Bad()); - set_irn_n(store, 2, new_Bad()); - set_irn_n(store, 3, new_Bad()); sched_remove(store); offset -= 4; @@ -352,6 +361,7 @@ static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) { } } +#if 0 /** * Tries to optimize two following IncSP. */ @@ -373,6 +383,7 @@ static void ia32_optimize_IncSP(ir_node *irn, ia32_code_gen_t *cg) { sched_remove(prev); } } +#endif /** * Performs Peephole Optimizations. @@ -391,7 +402,7 @@ static void ia32_peephole_optimize_node(ir_node *irn, void *env) { if (be_is_IncSP(irn)) { // optimize_IncSP doesn't respect dependency edges yet... //ia32_optimize_IncSP(irn, cg); - (void) ia32_optimize_IncSP; + if (cg->opt & IA32_OPT_PUSHARGS) ia32_create_Pushs(irn, cg); } @@ -525,6 +536,9 @@ static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const i is_ia32_GetST0(irn) || is_ia32_SetST0(irn) || is_ia32_xStoreSimple(irn)) return 0; + if(get_ia32_frame_ent(irn) != NULL) + return IA32_AM_CAND_NONE; + left = get_irn_n(irn, 2); arity = get_irn_arity(irn); assert(arity == 5 || arity == 4); @@ -598,7 +612,7 @@ static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const i cand = is_cand ? (cand | IA32_AM_CAND_RIGHT) : cand; /* if the irn has a frame entity: we do not use address mode */ - return get_ia32_frame_ent(irn) ? IA32_AM_CAND_NONE : cand; + return cand; } /** @@ -608,30 +622,24 @@ static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const i static int load_store_addr_is_equal(const ir_node *load, const ir_node *store, const ir_node *addr_b, const ir_node *addr_i) { - int is_equal = (addr_b == get_irn_n(load, 0)) && (addr_i == get_irn_n(load, 1)); - ir_entity *lent = get_ia32_frame_ent(load); - ir_entity *sent = get_ia32_frame_ent(store); - ident *lid = get_ia32_am_sc(load); - ident *sid = get_ia32_am_sc(store); - char *loffs = get_ia32_am_offs(load); - char *soffs = get_ia32_am_offs(store); - - /* are both entities set and equal? */ - if (is_equal && (lent || sent)) - is_equal = lent && sent && (lent == sent); - - /* are address mode idents set and equal? */ - if (is_equal && (lid || sid)) - is_equal = lid && sid && (lid == sid); - - /* are offsets set and equal */ - if (is_equal && (loffs || soffs)) - is_equal = loffs && soffs && strcmp(loffs, soffs) == 0; - - /* are the load and the store of the same mode? */ - is_equal = is_equal ? get_ia32_ls_mode(load) == get_ia32_ls_mode(store) : 0; - - return is_equal; + if(get_irn_n(load, 0) != addr_b) + return 0; + if(get_irn_n(load, 1) != addr_i) + return 0; + + if(get_ia32_frame_ent(load) != get_ia32_frame_ent(store)) + return 0; + + if(get_ia32_am_sc(load) != get_ia32_am_sc(store)) + return 0; + if(is_ia32_am_sc_sign(load) != is_ia32_am_sc_sign(store)) + return 0; + if(get_ia32_am_offs_int(load) != get_ia32_am_offs_int(store)) + return 0; + if(get_ia32_ls_mode(load) != get_ia32_ls_mode(store)) + return 0; + + return 1; } typedef enum _ia32_take_lea_attr { @@ -758,8 +766,8 @@ static INLINE void try_remove_from_sched(ir_node *node) { int i, arity; if(get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - foreach_out_edge(node, edge) { + const ir_edge_t *edge, *next; + foreach_out_edge_safe(node, edge, next) { ir_node *proj = get_edge_src_irn(edge); try_remove_from_sched(proj); } @@ -783,7 +791,7 @@ static INLINE void try_remove_from_sched(ir_node *node) { */ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { ir_graph *irg = get_irn_irg(irn); - dbg_info *dbg = get_irn_dbg_info(irn); + dbg_info *dbg_info = get_irn_dbg_info(irn); ir_node *block = get_nodes_block(irn); ir_node *res = irn; ir_node *shift = NULL; @@ -797,14 +805,13 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { int dolea = 0; int have_am_sc = 0; int am_sc_sign = 0; - ident *am_sc = NULL; + ir_entity *am_sc = NULL; ir_entity *lea_ent = NULL; ir_node *noreg = ia32_new_NoReg_gp(cg); ir_node *left, *right, *temp; ir_node *base, *index; int consumed_left_shift; ia32_am_flavour_t am_flav; - DEBUG_ONLY(firm_dbg_module_t *mod = cg->mod;) if (is_ia32_Add(irn)) isadd = 1; @@ -852,17 +859,17 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { if (is_ia32_ImmConst(irn)) { tarval *tv = get_ia32_Immop_tarval(irn); - DBG((mod, LEVEL_1, "\tfound op with imm const")); + DBG((dbg, LEVEL_1, "\tfound op with imm const")); offs_cnst = get_tarval_long(tv); dolea = 1; } else if (isadd && is_ia32_ImmSymConst(irn)) { - DBG((mod, LEVEL_1, "\tfound op with imm symconst")); + DBG((dbg, LEVEL_1, "\tfound op with imm symconst")); have_am_sc = 1; dolea = 1; - am_sc = get_ia32_id_cnst(irn); + am_sc = get_ia32_Immop_symconst(irn); am_sc_sign = is_ia32_am_sc_sign(irn); } @@ -873,7 +880,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { /* but we can only eat it up if there is no other symconst */ /* because the linker won't accept two symconsts */ if (! have_am_sc && is_ia32_Lea(temp) && get_ia32_am_flavour(temp) == ia32_am_O) { - DBG((mod, LEVEL_1, "\tgot op with LEA am_O")); + DBG((dbg, LEVEL_1, "\tgot op with LEA am_O")); offs_lea = get_ia32_am_offs_int(temp); am_sc = get_ia32_am_sc(temp); @@ -894,7 +901,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { dolea = 1; consumed_left_shift = -1; - DBG((mod, LEVEL_1, "\tgot LEA candidate with index %+F\n", index)); + DBG((dbg, LEVEL_1, "\tgot LEA candidate with index %+F\n", index)); /* determine the operand which needs to be checked */ temp = left; @@ -906,7 +913,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { /* check for SHL 1,2,3 */ if (pred_is_specific_node(temp, is_ia32_Shl)) { - if (get_ia32_Immop_tarval(temp)) { + if (is_ia32_ImmConst(temp)) { long shiftval = get_tarval_long(get_ia32_Immop_tarval(temp)); if (shiftval <= 3) { @@ -915,7 +922,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { shift = temp; scale = shiftval; - DBG((mod, LEVEL_1, "\tgot scaled index %+F\n", index)); + DBG((dbg, LEVEL_1, "\tgot scaled index %+F\n", index)); } } } @@ -939,10 +946,10 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { int take_attr = do_new_lea(irn, base, index, left, have_am_sc, cg); if (take_attr == IA32_LEA_ATTR_NONE) { - DBG((mod, LEVEL_1, "\tleave old LEA, creating new one\n")); + DBG((dbg, LEVEL_1, "\tleave old LEA, creating new one\n")); } else { - DBG((mod, LEVEL_1, "\tgot LEA as left operand ... assimilating\n")); + DBG((dbg, LEVEL_1, "\tgot LEA as left operand ... assimilating\n")); lea = left; /* for statistics */ if (take_attr & IA32_LEA_ATTR_OFFS) @@ -970,7 +977,11 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { /* ok, we can create a new LEA */ if (dolea) { - res = new_rd_ia32_Lea(dbg, irg, block, base, index); + res = new_rd_ia32_Lea(dbg_info, irg, block, base, index); + /* we don't want stuff before the barrier... */ + if(be_is_NoReg(cg, base) && be_is_NoReg(cg, index)) { + add_irn_dep(res, get_irg_frame(irg)); + } /* add the old offset of a previous LEA */ add_ia32_am_offs_int(res, offs); @@ -1030,7 +1041,16 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(cg, irn)); - DBG((mod, LEVEL_1, "\tLEA [%+F + %+F * %d + %s]\n", base, index, scale, get_ia32_am_offs(res))); + DBG((dbg, LEVEL_1, "\tLEA [%+F + %+F * %d + %d]\n", base, index, scale, get_ia32_am_offs_int(res))); + + assert(irn && "Couldn't find result proj"); + + /* get the result Proj of the Add/Sub */ + try_add_to_sched(irn, res); + + /* exchange the old op with the new LEA */ + try_remove_from_sched(irn); + exchange(irn, res); /* we will exchange it, report here before the Proj is created */ if (shift && lea && lea_o) { @@ -1038,45 +1058,30 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { try_remove_from_sched(lea); try_remove_from_sched(lea_o); DBG_OPT_LEA4(irn, lea_o, lea, shift, res); - } - else if (shift && lea) { + } else if (shift && lea) { try_remove_from_sched(shift); try_remove_from_sched(lea); DBG_OPT_LEA3(irn, lea, shift, res); - } - else if (shift && lea_o) { + } else if (shift && lea_o) { try_remove_from_sched(shift); try_remove_from_sched(lea_o); DBG_OPT_LEA3(irn, lea_o, shift, res); - } - else if (lea && lea_o) { + } else if (lea && lea_o) { try_remove_from_sched(lea); try_remove_from_sched(lea_o); DBG_OPT_LEA3(irn, lea_o, lea, res); - } - else if (shift) { + } else if (shift) { try_remove_from_sched(shift); DBG_OPT_LEA2(irn, shift, res); - } - else if (lea) { + } else if (lea) { try_remove_from_sched(lea); DBG_OPT_LEA2(irn, lea, res); - } - else if (lea_o) { + } else if (lea_o) { try_remove_from_sched(lea_o); DBG_OPT_LEA2(irn, lea_o, res); - } - else + } else { DBG_OPT_LEA1(irn, res); - - /* get the result Proj of the Add/Sub */ - try_add_to_sched(irn, res); - try_remove_from_sched(irn); - - assert(irn && "Couldn't find result proj"); - - /* exchange the old op with the new LEA */ - exchange(irn, res); + } } return res; @@ -1149,9 +1154,7 @@ static void exchange_left_right(ir_node *irn, ir_node **left, ir_node **right, i /** * Performs address calculation optimization (create LEAs if possible) */ -static void optimize_lea(ir_node *irn, void *env) { - ia32_code_gen_t *cg = env; - +static void optimize_lea(ia32_code_gen_t *cg, ir_node *irn) { if (! is_ia32_irn(irn)) return; @@ -1169,13 +1172,13 @@ static void optimize_lea(ir_node *irn, void *env) { if(!is_addr_candidate(irn)) return; - DBG((cg->mod, LEVEL_1, "\tfound address calculation candidate %+F ... ", irn)); + DBG((dbg, LEVEL_1, "\tfound address calculation candidate %+F ... ", irn)); res = fold_addr(cg, irn); if (res != irn) - DB((cg->mod, LEVEL_1, "transformed into %+F\n", res)); + DB((dbg, LEVEL_1, "transformed into %+F\n", res)); else - DB((cg->mod, LEVEL_1, "not transformed\n")); + DB((dbg, LEVEL_1, "not transformed\n")); } else if (is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn)) { /* - Load -> LEA into Load } TODO: If the LEA is used by more than one Load/Store */ /* - Store -> LEA into Store } it might be better to keep the LEA */ @@ -1190,7 +1193,7 @@ static void optimize_lea(ir_node *irn, void *env) { src = get_edge_src_irn(edge); if (src && (get_edge_src_pos(edge) == 0) && (is_ia32_Ld(src) || is_ia32_St(src) || is_ia32_Store8Bit(src))) { - DBG((cg->mod, LEVEL_1, "\nmerging %+F into %+F\n", left, irn)); + DBG((dbg, LEVEL_1, "\nmerging %+F into %+F\n", left, irn)); if (! is_ia32_got_lea(src)) merge_loadstore_lea(src, left); set_ia32_got_lea(src); @@ -1200,6 +1203,94 @@ static void optimize_lea(ir_node *irn, void *env) { } } +static void optimize_conv_store(ia32_code_gen_t *cg, ir_node *node) +{ + ir_node *pred; + ir_mode *conv_mode; + ir_mode *store_mode; + + if(!is_ia32_Store(node) && !is_ia32_Store8Bit(node)) + return; + + pred = get_irn_n(node, 2); + if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred)) + return; + + /* the store only stores the lower bits, so we only need the conv + * it it shrinks the mode */ + conv_mode = get_ia32_ls_mode(pred); + store_mode = get_ia32_ls_mode(node); + if(get_mode_size_bits(conv_mode) < get_mode_size_bits(store_mode)) + return; + + set_irn_n(node, 2, get_irn_n(pred, 2)); + if(get_irn_n_edges(pred) == 0) { + be_kill_node(pred); + } +} + +static void optimize_load_conv(ia32_code_gen_t *cg, ir_node *node) +{ + ir_node *pred, *predpred; + ir_mode *load_mode; + ir_mode *conv_mode; + + if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node)) + return; + + pred = get_irn_n(node, 2); + if(!is_Proj(pred)) + return; + + predpred = get_Proj_pred(pred); + if(!is_ia32_Load(predpred)) + return; + + /* the load is sign extending the upper bits, so we only need the conv + * if it shrinks the mode */ + load_mode = get_ia32_ls_mode(predpred); + conv_mode = get_ia32_ls_mode(node); + if(get_mode_size_bits(conv_mode) < get_mode_size_bits(load_mode)) + return; + + /* kill the conv */ + exchange(node, pred); +} + +static void optimize_conv_conv(ia32_code_gen_t *cg, ir_node *node) +{ + ir_node *pred; + ir_mode *pred_mode; + ir_mode *conv_mode; + + if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node)) + return; + + pred = get_irn_n(node, 2); + if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred)) + return; + + /* we know that after a conv, the upper bits are sign extended + * so we only need the 2nd conv if it shrinks the mode */ + conv_mode = get_ia32_ls_mode(node); + pred_mode = get_ia32_ls_mode(pred); + if(get_mode_size_bits(conv_mode) < get_mode_size_bits(pred_mode)) + return; + + /* kill the conv */ + exchange(node, pred); +} + +static void optimize_node(ir_node *node, void *env) +{ + ia32_code_gen_t *cg = env; + + optimize_load_conv(cg, node); + optimize_conv_store(cg, node); + optimize_conv_conv(cg, node); + optimize_lea(cg, node); +} + /** * Checks for address mode patterns and performs the * necessary transformations. @@ -1219,7 +1310,17 @@ static void optimize_am(ir_node *irn, void *env) { ia32_am_cand_t orig_cand; int dest_possible; int source_possible; - DEBUG_ONLY(firm_dbg_module_t *mod = cg->mod;) + + static const arch_register_req_t dest_out_reg_req_0 = { + arch_register_req_type_none, + NULL, /* regclass */ + NULL, /* limit bitset */ + -1, /* same pos */ + -1 /* different pos */ + }; + static const arch_register_req_t *dest_am_out_reqs[] = { + &dest_out_reg_req_0 + }; if (!is_ia32_irn(irn) || is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn)) return; @@ -1229,8 +1330,6 @@ static void optimize_am(ir_node *irn, void *env) { am_support = get_ia32_am_support(irn); block = get_nodes_block(irn); - DBG((mod, LEVEL_1, "checking for AM\n")); - /* fold following patterns: */ /* - op -> Load into AMop with am_Source */ /* conditions: */ @@ -1252,7 +1351,8 @@ static void optimize_am(ir_node *irn, void *env) { return; orig_cand = cand; - DBG((mod, LEVEL_1, "\tfound address mode candidate %+F ... ", irn)); + DBG((dbg, LEVEL_1, "\tfound address mode candidate %+F (candleft %d candright %d)... \n", irn, + cand & IA32_AM_CAND_LEFT, cand & IA32_AM_CAND_RIGHT)); left = get_irn_n(irn, 2); if (get_irn_arity(irn) == 4) { @@ -1266,6 +1366,8 @@ static void optimize_am(ir_node *irn, void *env) { dest_possible = am_support & ia32_am_Dest ? 1 : 0; source_possible = am_support & ia32_am_Source ? 1 : 0; + DBG((dbg, LEVEL_2, "\tdest_possible %d source_possible %d ... \n", dest_possible, source_possible)); + if (dest_possible) { addr_b = NULL; addr_i = NULL; @@ -1283,6 +1385,7 @@ static void optimize_am(ir_node *irn, void *env) { } if (store == NULL) { + DBG((dbg, LEVEL_2, "\tno store found, not using dest_mode\n")); dest_possible = 0; } } @@ -1291,7 +1394,9 @@ static void optimize_am(ir_node *irn, void *env) { /* normalize nodes, we need the interesting load on the left side */ if (cand & IA32_AM_CAND_RIGHT) { load = get_Proj_pred(right); - if (load_store_addr_is_equal(load, store, addr_b, addr_i)) { + if (load_store_addr_is_equal(load, store, addr_b, addr_i) + && node_is_ia32_comm(irn)) { + DBG((dbg, LEVEL_2, "\texchanging left/right\n")); exchange_left_right(irn, &left, &right, 3, 2); need_exchange_on_fail ^= 1; if (cand == IA32_AM_CAND_RIGHT) @@ -1307,10 +1412,12 @@ static void optimize_am(ir_node *irn, void *env) { #ifndef AGGRESSIVE_AM /* we have to be the only user of the load */ if (get_irn_n_edges(left) > 1) { + DBG((dbg, LEVEL_2, "\tmatching load has too may users, not using dest_mode\n")); dest_possible = 0; } #endif } else { + DBG((dbg, LEVEL_2, "\tno matching load found, not using dest_mode")); dest_possible = 0; } } @@ -1322,16 +1429,26 @@ static void optimize_am(ir_node *irn, void *env) { ir_node *storemem = get_irn_n(store, 3); assert(get_irn_mode(loadmem) == mode_M); assert(get_irn_mode(storemem) == mode_M); - if(storemem != loadmem || !is_Proj(storemem) - || get_Proj_pred(storemem) != load) { + /* TODO there could be a sync between store and load... */ + if(storemem != loadmem && (!is_Proj(storemem) || get_Proj_pred(storemem) != load)) { + DBG((dbg, LEVEL_2, "\tload/store using different memories, not using dest_mode")); dest_possible = 0; } } if (dest_possible) { /* Compare Load and Store address */ - if (!load_store_addr_is_equal(load, store, addr_b, addr_i)) + if (!load_store_addr_is_equal(load, store, addr_b, addr_i)) { + DBG((dbg, LEVEL_2, "\taddresses not equal, not using dest_mode")); dest_possible = 0; + } + } + + if (dest_possible) { + ir_mode *lsmode = get_ia32_ls_mode(load); + if(get_mode_size_bits(lsmode) != 32) { + dest_possible = 0; + } } if (dest_possible) { @@ -1346,17 +1463,12 @@ static void optimize_am(ir_node *irn, void *env) { set_ia32_am_flavour(irn, get_ia32_am_flavour(load)); set_ia32_op_type(irn, ia32_AddrModeD); set_ia32_frame_ent(irn, get_ia32_frame_ent(load)); - if(is_ia32_use_frame(load)) - set_ia32_use_frame(irn); set_ia32_ls_mode(irn, get_ia32_ls_mode(load)); set_ia32_am_sc(irn, get_ia32_am_sc(load)); if (is_ia32_am_sc_sign(load)) set_ia32_am_sc_sign(irn); - if (is_ia32_use_frame(load)) - set_ia32_use_frame(irn); - /* connect to Load memory and disconnect Load */ if (get_irn_arity(irn) == 5) { /* binary AMop */ @@ -1368,20 +1480,21 @@ static void optimize_am(ir_node *irn, void *env) { set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2)); } + /* change node mode and out register requirements */ set_irn_mode(irn, mode_M); + set_ia32_out_req_all(irn, dest_am_out_reqs); /* connect the memory Proj of the Store to the op */ - mem_proj = ia32_get_proj_for_mode(store, mode_M); - edges_reroute(mem_proj, irn, irg); + edges_reroute(store, irn, irg); /* clear remat flag */ set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable); - try_remove_from_sched(load); try_remove_from_sched(store); + try_remove_from_sched(load); DBG_OPT_AM_D(load, store, irn); - DB((mod, LEVEL_1, "merged with %+F and %+F into dest AM\n", load, store)); + DB((dbg, LEVEL_1, "merged with %+F and %+F into dest AM\n", load, store)); need_exchange_on_fail = 0; source_possible = 0; } @@ -1409,6 +1522,13 @@ static void optimize_am(ir_node *irn, void *env) { } } + if (source_possible) { + ir_mode *ls_mode = get_ia32_ls_mode(load); + if(get_mode_size_bits(ls_mode) != 32) + source_possible = 0; + + } + if (source_possible) { addr_b = get_irn_n(load, 0); addr_i = get_irn_n(load, 1); @@ -1430,8 +1550,12 @@ static void optimize_am(ir_node *irn, void *env) { /* clear remat flag */ set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable); - if (is_ia32_use_frame(load)) + if (is_ia32_use_frame(load)) { + if(get_ia32_frame_ent(load) == NULL) { + set_ia32_need_stackent(irn); + } set_ia32_use_frame(irn); + } /* connect to Load memory and disconnect Load */ if (get_irn_arity(irn) == 5) { @@ -1474,7 +1598,7 @@ static void optimize_am(ir_node *irn, void *env) { } need_exchange_on_fail = 0; - DB((mod, LEVEL_1, "merged with %+F into source AM\n", load)); + DB((dbg, LEVEL_1, "merged with %+F into source AM\n", load)); } /* was exchanged but optimize failed: exchange back */ @@ -1484,15 +1608,11 @@ static void optimize_am(ir_node *irn, void *env) { } /** - * Performs address mode optimization. + * Performs conv and address mode optimization. */ -void ia32_optimize_addressmode(ia32_code_gen_t *cg) { +void ia32_optimize_graph(ia32_code_gen_t *cg) { /* if we are supposed to do AM or LEA optimization: recalculate edges */ - if (cg->opt & (IA32_OPT_DOAM | IA32_OPT_LEA)) { - edges_deactivate(cg->irg); - edges_activate(cg->irg); - } - else { + if (! (cg->opt & (IA32_OPT_DOAM | IA32_OPT_LEA))) { /* no optimizations at all */ return; } @@ -1502,12 +1622,19 @@ void ia32_optimize_addressmode(ia32_code_gen_t *cg) { /* invalidates the phase data */ if (cg->opt & IA32_OPT_LEA) { - irg_walk_blkwise_graph(cg->irg, NULL, optimize_lea, cg); + irg_walk_blkwise_graph(cg->irg, NULL, optimize_node, cg); } if (cg->dump) be_dump(cg->irg, "-lea", dump_ir_block_graph_sched); + /* hack for now, so these don't get created during optimize, because then + * they will be unknown to the heights module + */ + ia32_new_NoReg_gp(cg); + ia32_new_NoReg_fp(cg); + ia32_new_NoReg_vfp(cg); + if (cg->opt & IA32_OPT_DOAM) { /* we need height information for am optimization */ heights_t *h = heights_new(cg->irg); @@ -1521,3 +1648,8 @@ void ia32_optimize_addressmode(ia32_code_gen_t *cg) { heights_free(h); } } + +void ia32_init_optimize(void) +{ + FIRM_DBG_REGISTER(dbg, "firm.be.ia32.optimize"); +}