X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fia32_optimize.c;h=6f5c153f8949a9850ceb769a752a377cfb238f01;hb=d5629a5184ffe6a9e43f5bd629a09ff2aea6417a;hp=51336e500ad8ddcbcc37a6fe99a082d7340f9320;hpb=e92520a7d6eefdff288de8efd4ed8724c57f6397;p=libfirm diff --git a/ir/be/ia32/ia32_optimize.c b/ir/be/ia32/ia32_optimize.c index 51336e500..6f5c153f8 100644 --- a/ir/be/ia32/ia32_optimize.c +++ b/ir/be/ia32/ia32_optimize.c @@ -1,14 +1,30 @@ +/* + * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved. + * + * This file is part of libFirm. + * + * This file may be distributed and/or modified under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation and appearing in the file LICENSE.GPL included in the + * packaging of this file. + * + * Licensees holding valid libFirm Professional Edition licenses may use + * this file in accordance with the libFirm Commercial License. + * Agreement provided with the Software. + * + * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE + * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. + */ + /** - * Project: libFIRM - * File name: ir/be/ia32/ia32_optimize.c - * Purpose: Implements several optimizations for IA32 - * Author: Christian Wuerdig - * CVS-ID: $Id$ - * Copyright: (c) 2006 Universitaet Karlsruhe - * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. + * @file + * @brief Implements several optimizations for IA32. + * @author Christian Wuerdig + * @version $Id$ */ #ifdef HAVE_CONFIG_H -#include +#include "config.h" #endif #include "irnode.h" @@ -29,11 +45,13 @@ #include "ia32_new_nodes.h" #include "bearch_ia32_t.h" -#include "gen_ia32_regalloc_if.h" /* the generated interface (register type and class defenitions) */ +#include "gen_ia32_regalloc_if_t.h" #include "ia32_transform.h" #include "ia32_dbg_stat.h" #include "ia32_util.h" +DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) + #define AGGRESSIVE_AM typedef enum { @@ -158,14 +176,14 @@ static void ia32_optimize_TestJmp(ir_node *irn, ia32_code_gen_t *cg) { replace = cand ? is_TestJmp_replacement(cand, irn) : 0; if (replace) { - DBG((cg->mod, LEVEL_1, "replacing %+F by ", irn)); + DBG((dbg, LEVEL_1, "replacing %+F by ", irn)); if (is_ia32_And(cand)) set_irn_op(irn, op_ia32_CJmpAM); else set_irn_op(irn, op_ia32_CJmp); - DB((cg->mod, LEVEL_1, "%+F\n", irn)); + DB((dbg, LEVEL_1, "%+F\n", irn)); } } @@ -200,12 +218,12 @@ static void ia32_optimize_CondJmp(ir_node *irn, ia32_code_gen_t *cg) { replace = cand ? is_CondJmp_replacement(cand, irn) : 0; if (replace) { - DBG((cg->mod, LEVEL_1, "replacing %+F by ", irn)); + DBG((dbg, LEVEL_1, "replacing %+F by ", irn)); DBG_OPT_CJMP(irn); set_irn_op(irn, op_ia32_CJmpAM); - DB((cg->mod, LEVEL_1, "%+F\n", irn)); + DB((dbg, LEVEL_1, "%+F\n", irn)); } } @@ -520,6 +538,9 @@ static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const i is_ia32_GetST0(irn) || is_ia32_SetST0(irn) || is_ia32_xStoreSimple(irn)) return 0; + if(get_ia32_frame_ent(irn) != NULL) + return IA32_AM_CAND_NONE; + left = get_irn_n(irn, 2); arity = get_irn_arity(irn); assert(arity == 5 || arity == 4); @@ -593,7 +614,7 @@ static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const i cand = is_cand ? (cand | IA32_AM_CAND_RIGHT) : cand; /* if the irn has a frame entity: we do not use address mode */ - return get_ia32_frame_ent(irn) ? IA32_AM_CAND_NONE : cand; + return cand; } /** @@ -747,8 +768,8 @@ static INLINE void try_remove_from_sched(ir_node *node) { int i, arity; if(get_irn_mode(node) == mode_T) { - const ir_edge_t *edge; - foreach_out_edge(node, edge) { + const ir_edge_t *edge, *next; + foreach_out_edge_safe(node, edge, next) { ir_node *proj = get_edge_src_irn(edge); try_remove_from_sched(proj); } @@ -772,7 +793,7 @@ static INLINE void try_remove_from_sched(ir_node *node) { */ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { ir_graph *irg = get_irn_irg(irn); - dbg_info *dbg = get_irn_dbg_info(irn); + dbg_info *dbg_info = get_irn_dbg_info(irn); ir_node *block = get_nodes_block(irn); ir_node *res = irn; ir_node *shift = NULL; @@ -786,14 +807,13 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { int dolea = 0; int have_am_sc = 0; int am_sc_sign = 0; - ident *am_sc = NULL; + ir_entity *am_sc = NULL; ir_entity *lea_ent = NULL; ir_node *noreg = ia32_new_NoReg_gp(cg); ir_node *left, *right, *temp; ir_node *base, *index; int consumed_left_shift; ia32_am_flavour_t am_flav; - DEBUG_ONLY(firm_dbg_module_t *mod = cg->mod;) if (is_ia32_Add(irn)) isadd = 1; @@ -841,13 +861,13 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { if (is_ia32_ImmConst(irn)) { tarval *tv = get_ia32_Immop_tarval(irn); - DBG((mod, LEVEL_1, "\tfound op with imm const")); + DBG((dbg, LEVEL_1, "\tfound op with imm const")); offs_cnst = get_tarval_long(tv); dolea = 1; } else if (isadd && is_ia32_ImmSymConst(irn)) { - DBG((mod, LEVEL_1, "\tfound op with imm symconst")); + DBG((dbg, LEVEL_1, "\tfound op with imm symconst")); have_am_sc = 1; dolea = 1; @@ -862,7 +882,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { /* but we can only eat it up if there is no other symconst */ /* because the linker won't accept two symconsts */ if (! have_am_sc && is_ia32_Lea(temp) && get_ia32_am_flavour(temp) == ia32_am_O) { - DBG((mod, LEVEL_1, "\tgot op with LEA am_O")); + DBG((dbg, LEVEL_1, "\tgot op with LEA am_O")); offs_lea = get_ia32_am_offs_int(temp); am_sc = get_ia32_am_sc(temp); @@ -883,7 +903,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { dolea = 1; consumed_left_shift = -1; - DBG((mod, LEVEL_1, "\tgot LEA candidate with index %+F\n", index)); + DBG((dbg, LEVEL_1, "\tgot LEA candidate with index %+F\n", index)); /* determine the operand which needs to be checked */ temp = left; @@ -904,7 +924,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { shift = temp; scale = shiftval; - DBG((mod, LEVEL_1, "\tgot scaled index %+F\n", index)); + DBG((dbg, LEVEL_1, "\tgot scaled index %+F\n", index)); } } } @@ -928,10 +948,10 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { int take_attr = do_new_lea(irn, base, index, left, have_am_sc, cg); if (take_attr == IA32_LEA_ATTR_NONE) { - DBG((mod, LEVEL_1, "\tleave old LEA, creating new one\n")); + DBG((dbg, LEVEL_1, "\tleave old LEA, creating new one\n")); } else { - DBG((mod, LEVEL_1, "\tgot LEA as left operand ... assimilating\n")); + DBG((dbg, LEVEL_1, "\tgot LEA as left operand ... assimilating\n")); lea = left; /* for statistics */ if (take_attr & IA32_LEA_ATTR_OFFS) @@ -959,7 +979,7 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { /* ok, we can create a new LEA */ if (dolea) { - res = new_rd_ia32_Lea(dbg, irg, block, base, index); + res = new_rd_ia32_Lea(dbg_info, irg, block, base, index); /* add the old offset of a previous LEA */ add_ia32_am_offs_int(res, offs); @@ -1019,7 +1039,16 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(cg, irn)); - DBG((mod, LEVEL_1, "\tLEA [%+F + %+F * %d + %d]\n", base, index, scale, get_ia32_am_offs_int(res))); + DBG((dbg, LEVEL_1, "\tLEA [%+F + %+F * %d + %d]\n", base, index, scale, get_ia32_am_offs_int(res))); + + assert(irn && "Couldn't find result proj"); + + /* get the result Proj of the Add/Sub */ + try_add_to_sched(irn, res); + + /* exchange the old op with the new LEA */ + try_remove_from_sched(irn); + exchange(irn, res); /* we will exchange it, report here before the Proj is created */ if (shift && lea && lea_o) { @@ -1027,45 +1056,30 @@ static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) { try_remove_from_sched(lea); try_remove_from_sched(lea_o); DBG_OPT_LEA4(irn, lea_o, lea, shift, res); - } - else if (shift && lea) { + } else if (shift && lea) { try_remove_from_sched(shift); try_remove_from_sched(lea); DBG_OPT_LEA3(irn, lea, shift, res); - } - else if (shift && lea_o) { + } else if (shift && lea_o) { try_remove_from_sched(shift); try_remove_from_sched(lea_o); DBG_OPT_LEA3(irn, lea_o, shift, res); - } - else if (lea && lea_o) { + } else if (lea && lea_o) { try_remove_from_sched(lea); try_remove_from_sched(lea_o); DBG_OPT_LEA3(irn, lea_o, lea, res); - } - else if (shift) { + } else if (shift) { try_remove_from_sched(shift); DBG_OPT_LEA2(irn, shift, res); - } - else if (lea) { + } else if (lea) { try_remove_from_sched(lea); DBG_OPT_LEA2(irn, lea, res); - } - else if (lea_o) { + } else if (lea_o) { try_remove_from_sched(lea_o); DBG_OPT_LEA2(irn, lea_o, res); - } - else + } else { DBG_OPT_LEA1(irn, res); - - /* get the result Proj of the Add/Sub */ - try_add_to_sched(irn, res); - try_remove_from_sched(irn); - - assert(irn && "Couldn't find result proj"); - - /* exchange the old op with the new LEA */ - exchange(irn, res); + } } return res; @@ -1138,9 +1152,7 @@ static void exchange_left_right(ir_node *irn, ir_node **left, ir_node **right, i /** * Performs address calculation optimization (create LEAs if possible) */ -static void optimize_lea(ir_node *irn, void *env) { - ia32_code_gen_t *cg = env; - +static void optimize_lea(ia32_code_gen_t *cg, ir_node *irn) { if (! is_ia32_irn(irn)) return; @@ -1158,13 +1170,13 @@ static void optimize_lea(ir_node *irn, void *env) { if(!is_addr_candidate(irn)) return; - DBG((cg->mod, LEVEL_1, "\tfound address calculation candidate %+F ... ", irn)); + DBG((dbg, LEVEL_1, "\tfound address calculation candidate %+F ... ", irn)); res = fold_addr(cg, irn); if (res != irn) - DB((cg->mod, LEVEL_1, "transformed into %+F\n", res)); + DB((dbg, LEVEL_1, "transformed into %+F\n", res)); else - DB((cg->mod, LEVEL_1, "not transformed\n")); + DB((dbg, LEVEL_1, "not transformed\n")); } else if (is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn)) { /* - Load -> LEA into Load } TODO: If the LEA is used by more than one Load/Store */ /* - Store -> LEA into Store } it might be better to keep the LEA */ @@ -1179,7 +1191,7 @@ static void optimize_lea(ir_node *irn, void *env) { src = get_edge_src_irn(edge); if (src && (get_edge_src_pos(edge) == 0) && (is_ia32_Ld(src) || is_ia32_St(src) || is_ia32_Store8Bit(src))) { - DBG((cg->mod, LEVEL_1, "\nmerging %+F into %+F\n", left, irn)); + DBG((dbg, LEVEL_1, "\nmerging %+F into %+F\n", left, irn)); if (! is_ia32_got_lea(src)) merge_loadstore_lea(src, left); set_ia32_got_lea(src); @@ -1189,6 +1201,55 @@ static void optimize_lea(ir_node *irn, void *env) { } } +static void optimize_conv_store(ia32_code_gen_t *cg, ir_node *node) +{ + ir_node *pred; + + if(!is_ia32_Store(node) && !is_ia32_Store8Bit(node)) + return; + + pred = get_irn_n(node, 2); + if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred)) + return; + + if(get_ia32_ls_mode(pred) != get_ia32_ls_mode(node)) + return; + + /* unnecessary conv, the store already does the conversion */ + set_irn_n(node, 2, get_irn_n(pred, 2)); + if(get_irn_n_edges(pred) == 0) { + be_kill_node(pred); + } +} + +static void optimize_load_conv(ia32_code_gen_t *cg, ir_node *node) +{ + ir_node *pred, *predpred; + + if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node)) + return; + + pred = get_irn_n(node, 2); + if(!is_Proj(pred)) + return; + + predpred = get_Proj_pred(pred); + if(!is_ia32_Load(predpred)) + return; + + /* unnecessary conv, the load already did the conversion */ + exchange(node, pred); +} + +static void optimize_node(ir_node *node, void *env) +{ + ia32_code_gen_t *cg = env; + + optimize_load_conv(cg, node); + optimize_conv_store(cg, node); + optimize_lea(cg, node); +} + /** * Checks for address mode patterns and performs the * necessary transformations. @@ -1208,7 +1269,17 @@ static void optimize_am(ir_node *irn, void *env) { ia32_am_cand_t orig_cand; int dest_possible; int source_possible; - DEBUG_ONLY(firm_dbg_module_t *mod = cg->mod;) + + static const arch_register_req_t dest_out_reg_req_0 = { + arch_register_req_type_none, + NULL, /* regclass */ + NULL, /* limit bitset */ + -1, /* same pos */ + -1 /* different pos */ + }; + static const arch_register_req_t *dest_am_out_reqs[] = { + &dest_out_reg_req_0 + }; if (!is_ia32_irn(irn) || is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn)) return; @@ -1218,8 +1289,6 @@ static void optimize_am(ir_node *irn, void *env) { am_support = get_ia32_am_support(irn); block = get_nodes_block(irn); - DBG((mod, LEVEL_1, "checking for AM\n")); - /* fold following patterns: */ /* - op -> Load into AMop with am_Source */ /* conditions: */ @@ -1241,7 +1310,8 @@ static void optimize_am(ir_node *irn, void *env) { return; orig_cand = cand; - DBG((mod, LEVEL_1, "\tfound address mode candidate %+F ... ", irn)); + DBG((dbg, LEVEL_1, "\tfound address mode candidate %+F (candleft %d candright %d)... \n", irn, + cand & IA32_AM_CAND_LEFT, cand & IA32_AM_CAND_RIGHT)); left = get_irn_n(irn, 2); if (get_irn_arity(irn) == 4) { @@ -1255,6 +1325,8 @@ static void optimize_am(ir_node *irn, void *env) { dest_possible = am_support & ia32_am_Dest ? 1 : 0; source_possible = am_support & ia32_am_Source ? 1 : 0; + DBG((dbg, LEVEL_2, "\tdest_possible %d source_possible %d ... \n", dest_possible, source_possible)); + if (dest_possible) { addr_b = NULL; addr_i = NULL; @@ -1272,6 +1344,7 @@ static void optimize_am(ir_node *irn, void *env) { } if (store == NULL) { + DBG((dbg, LEVEL_2, "\tno store found, not using dest_mode\n")); dest_possible = 0; } } @@ -1280,7 +1353,9 @@ static void optimize_am(ir_node *irn, void *env) { /* normalize nodes, we need the interesting load on the left side */ if (cand & IA32_AM_CAND_RIGHT) { load = get_Proj_pred(right); - if (load_store_addr_is_equal(load, store, addr_b, addr_i)) { + if (load_store_addr_is_equal(load, store, addr_b, addr_i) + && node_is_ia32_comm(irn)) { + DBG((dbg, LEVEL_2, "\texchanging left/right\n")); exchange_left_right(irn, &left, &right, 3, 2); need_exchange_on_fail ^= 1; if (cand == IA32_AM_CAND_RIGHT) @@ -1296,10 +1371,12 @@ static void optimize_am(ir_node *irn, void *env) { #ifndef AGGRESSIVE_AM /* we have to be the only user of the load */ if (get_irn_n_edges(left) > 1) { + DBG((dbg, LEVEL_2, "\tmatching load has too may users, not using dest_mode\n")); dest_possible = 0; } #endif } else { + DBG((dbg, LEVEL_2, "\tno matching load found, not using dest_mode")); dest_possible = 0; } } @@ -1311,16 +1388,27 @@ static void optimize_am(ir_node *irn, void *env) { ir_node *storemem = get_irn_n(store, 3); assert(get_irn_mode(loadmem) == mode_M); assert(get_irn_mode(storemem) == mode_M); - if(storemem != loadmem || !is_Proj(storemem) - || get_Proj_pred(storemem) != load) { + /* TODO there could be a sync between store and load... */ + if(storemem != loadmem && (!is_Proj(storemem) || get_Proj_pred(storemem) != load)) { + DBG((dbg, LEVEL_2, "\tload/store using different memories, not using dest_mode")); dest_possible = 0; } } if (dest_possible) { /* Compare Load and Store address */ - if (!load_store_addr_is_equal(load, store, addr_b, addr_i)) + if (!load_store_addr_is_equal(load, store, addr_b, addr_i)) { + DBG((dbg, LEVEL_2, "\taddresses not equal, not using dest_mode")); dest_possible = 0; + } + } + + if (dest_possible) { + assert(is_ia32_Load(load)); + ir_mode *lsmode = get_ia32_ls_mode(load); + if(get_mode_size_bits(lsmode) != 32) { + dest_possible = 0; + } } if (dest_possible) { @@ -1352,20 +1440,21 @@ static void optimize_am(ir_node *irn, void *env) { set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2)); } + /* change node mode and out register requirements */ set_irn_mode(irn, mode_M); + set_ia32_out_req_all(irn, dest_am_out_reqs); /* connect the memory Proj of the Store to the op */ - mem_proj = ia32_get_proj_for_mode(store, mode_M); - edges_reroute(mem_proj, irn, irg); + edges_reroute(store, irn, irg); /* clear remat flag */ set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable); - try_remove_from_sched(load); try_remove_from_sched(store); + try_remove_from_sched(load); DBG_OPT_AM_D(load, store, irn); - DB((mod, LEVEL_1, "merged with %+F and %+F into dest AM\n", load, store)); + DB((dbg, LEVEL_1, "merged with %+F and %+F into dest AM\n", load, store)); need_exchange_on_fail = 0; source_possible = 0; } @@ -1393,6 +1482,14 @@ static void optimize_am(ir_node *irn, void *env) { } } + if (source_possible) { + assert(is_ia32_Load(load)); + ir_mode *ls_mode = get_ia32_ls_mode(load); + if(get_mode_size_bits(ls_mode) != 32) + source_possible = 0; + + } + if (source_possible) { addr_b = get_irn_n(load, 0); addr_i = get_irn_n(load, 1); @@ -1462,7 +1559,7 @@ static void optimize_am(ir_node *irn, void *env) { } need_exchange_on_fail = 0; - DB((mod, LEVEL_1, "merged with %+F into source AM\n", load)); + DB((dbg, LEVEL_1, "merged with %+F into source AM\n", load)); } /* was exchanged but optimize failed: exchange back */ @@ -1472,15 +1569,11 @@ static void optimize_am(ir_node *irn, void *env) { } /** - * Performs address mode optimization. + * Performs conv and address mode optimization. */ -void ia32_optimize_addressmode(ia32_code_gen_t *cg) { +void ia32_optimize_graph(ia32_code_gen_t *cg) { /* if we are supposed to do AM or LEA optimization: recalculate edges */ - if (cg->opt & (IA32_OPT_DOAM | IA32_OPT_LEA)) { - edges_deactivate(cg->irg); - edges_activate(cg->irg); - } - else { + if (! (cg->opt & (IA32_OPT_DOAM | IA32_OPT_LEA))) { /* no optimizations at all */ return; } @@ -1490,12 +1583,19 @@ void ia32_optimize_addressmode(ia32_code_gen_t *cg) { /* invalidates the phase data */ if (cg->opt & IA32_OPT_LEA) { - irg_walk_blkwise_graph(cg->irg, NULL, optimize_lea, cg); + irg_walk_blkwise_graph(cg->irg, NULL, optimize_node, cg); } if (cg->dump) be_dump(cg->irg, "-lea", dump_ir_block_graph_sched); + /* hack for now, so these don't get created during optimize, because then + * they will be unknown to the heights module + */ + ia32_new_NoReg_gp(cg); + ia32_new_NoReg_fp(cg); + ia32_new_NoReg_vfp(cg); + if (cg->opt & IA32_OPT_DOAM) { /* we need height information for am optimization */ heights_t *h = heights_new(cg->irg); @@ -1509,3 +1609,8 @@ void ia32_optimize_addressmode(ia32_code_gen_t *cg) { heights_free(h); } } + +void ia32_init_optimize(void) +{ + FIRM_DBG_REGISTER(dbg, "firm.be.ia32.optimize"); +}