nsz Git - libfirm/blob - ir/be/ia32/ia32_optimize.c

   1 /*
   2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief       Implements several optimizations for IA32.
  23  * @author      Matthias Braun, Christian Wuerdig
  24  * @version     $Id$
  25  */
  26 #ifdef HAVE_CONFIG_H
  27 #include "config.h"
  28 #endif
  29
  30 #include "irnode.h"
  31 #include "irprog_t.h"
  32 #include "ircons.h"
  33 #include "irtools.h"
  34 #include "firm_types.h"
  35 #include "iredges.h"
  36 #include "tv.h"
  37 #include "irgmod.h"
  38 #include "irgwalk.h"
  39 #include "height.h"
  40 #include "irbitset.h"
  41
  42 #include "../be_t.h"
  43 #include "../beabi.h"
  44 #include "../benode_t.h"
  45 #include "../besched_t.h"
  46
  47 #include "ia32_new_nodes.h"
  48 #include "bearch_ia32_t.h"
  49 #include "gen_ia32_regalloc_if.h"
  50 #include "ia32_transform.h"
  51 #include "ia32_dbg_stat.h"
  52 #include "ia32_util.h"
  53
  54 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
  55
  56 typedef int is_op_func_t(const ir_node *n);
  57 typedef ir_node *load_func_t(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
  58
  59 /**
  60  * checks if a node represents the NOREG value
  61  */
  62 static INLINE int be_is_NoReg(ia32_code_gen_t *cg, const ir_node *irn) {
  63         return irn == cg->noreg_gp || irn == cg->noreg_xmm || irn == cg->noreg_vfp;
  64 }
  65
  66 /********************************************************************************************************
  67  *  _____                _           _         ____        _   _           _          _   _
  68  * |  __ \              | |         | |       / __ \      | | (_)         (_)        | | (_)
  69  * | |__) |__  ___ _ __ | |__   ___ | | ___  | |  | |_ __ | |_ _ _ __ ___  _ ______ _| |_ _  ___  _ __
  70  * |  ___/ _ \/ _ \ '_ \| '_ \ / _ \| |/ _ \ | |  | | '_ \| __| | '_ ` _ \| |_  / _` | __| |/ _ \| '_ \
  71  * | |  |  __/  __/ |_) | | | | (_) | |  __/ | |__| | |_) | |_| | | | | | | |/ / (_| | |_| | (_) | | | |
  72  * |_|   \___|\___| .__/|_| |_|\___/|_|\___|  \____/| .__/ \__|_|_| |_| |_|_/___\__,_|\__|_|\___/|_| |_|
  73  *                | |                               | |
  74  *                |_|                               |_|
  75  ********************************************************************************************************/
  76
  77 /**
  78  * NOTE: THESE PEEPHOLE OPTIMIZATIONS MUST BE CALLED AFTER SCHEDULING AND REGISTER ALLOCATION.
  79  */
  80
  81 // only optimize up to 48 stores behind IncSPs
  82 #define MAXPUSH_OPTIMIZE        48
  83
  84 /**
  85  * Tries to create pushs from IncSP,Store combinations
  86  */
  87 static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) {
  88         int i;
  89         int offset;
  90         ir_node *node;
  91         ir_node *stores[MAXPUSH_OPTIMIZE];
  92         ir_node *block = get_nodes_block(irn);
  93         ir_graph *irg = cg->irg;
  94         ir_node *curr_sp;
  95         ir_mode *spmode = get_irn_mode(irn);
  96
  97         memset(stores, 0, sizeof(stores));
  98
  99         assert(be_is_IncSP(irn));
 100
 101         offset = be_get_IncSP_offset(irn);
 102         if(offset < 4)
 103                 return;
 104
 105         /*
 106          * We first walk the schedule after the IncSP node as long as we find
 107          * suitable stores that could be transformed to a push.
 108          * We save them into the stores array which is sorted by the frame offset/4
 109          * attached to the node
 110          */
 111         for(node = sched_next(irn); !sched_is_end(node); node = sched_next(node)) {
 112                 ir_node *mem;
 113                 int offset;
 114                 int storeslot;
 115
 116                 // it has to be a store
 117                 if(!is_ia32_Store(node))
 118                         break;
 119
 120                 // it has to use our sp value
 121                 if(get_irn_n(node, 0) != irn)
 122                         continue;
 123                 // store has to be attached to NoMem
 124                 mem = get_irn_n(node, 3);
 125                 if(!is_NoMem(mem)) {
 126                         continue;
 127                 }
 128
 129                 /* unfortunately we can't support the full AMs possible for push at the
 130                  * moment. TODO: fix this */
 131                 if(get_ia32_am_scale(node) > 0 || !is_ia32_NoReg_GP(get_irn_n(node, 1)))
 132                         break;
 133
 134                 offset = get_ia32_am_offs_int(node);
 135
 136                 storeslot = offset / 4;
 137                 if(storeslot >= MAXPUSH_OPTIMIZE)
 138                         continue;
 139
 140                 // storing into the same slot twice is bad (and shouldn't happen...)
 141                 if(stores[storeslot] != NULL)
 142                         break;
 143
 144                 // storing at half-slots is bad
 145                 if(offset % 4 != 0)
 146                         break;
 147
 148                 stores[storeslot] = node;
 149         }
 150
 151         curr_sp = get_irn_n(irn, 0);
 152
 153         // walk the stores in inverse order and create pushs for them
 154         i = (offset / 4) - 1;
 155         if(i >= MAXPUSH_OPTIMIZE) {
 156                 i = MAXPUSH_OPTIMIZE - 1;
 157         }
 158
 159         for( ; i >= 0; --i) {
 160                 const arch_register_t *spreg;
 161                 ir_node *push;
 162                 ir_node *val, *mem, *mem_proj;
 163                 ir_node *store = stores[i];
 164                 ir_node *noreg = ia32_new_NoReg_gp(cg);
 165
 166                 if(store == NULL || is_Bad(store))
 167                         break;
 168
 169                 val = get_irn_n(store, 2);
 170                 mem = get_irn_n(store, 3);
 171                 spreg = arch_get_irn_register(cg->arch_env, curr_sp);
 172
 173                 push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, val, curr_sp, mem);
 174
 175                 set_ia32_am_support(push, ia32_am_Source, ia32_am_unary);
 176
 177                 sched_add_before(irn, push);
 178
 179                 // create stackpointer proj
 180                 curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack);
 181                 arch_set_irn_register(cg->arch_env, curr_sp, spreg);
 182
 183                 // create memory proj
 184                 mem_proj = new_r_Proj(irg, block, push, mode_M, pn_ia32_Push_M);
 185
 186                 // use the memproj now
 187                 exchange(store, mem_proj);
 188
 189                 // we can remove the store now
 190                 sched_remove(store);
 191
 192                 offset -= 4;
 193         }
 194
 195         be_set_IncSP_offset(irn, offset);
 196
 197         // can we remove the IncSP now?
 198         if(offset == 0) {
 199                 const ir_edge_t *edge, *next;
 200
 201                 foreach_out_edge_safe(irn, edge, next) {
 202                         ir_node *arg = get_edge_src_irn(edge);
 203                         int pos = get_edge_src_pos(edge);
 204
 205                         set_irn_n(arg, pos, curr_sp);
 206                 }
 207
 208                 set_irn_n(irn, 0, new_Bad());
 209                 sched_remove(irn);
 210         } else {
 211                 set_irn_n(irn, 0, curr_sp);
 212         }
 213 }
 214
 215 /**
 216  * Tries to optimize two following IncSP.
 217  */
 218 static void ia32_optimize_IncSP(ir_node *node)
 219 {
 220         int      pred_offs;
 221         int      curr_offs;
 222         int      offs;
 223         ir_node *pred = be_get_IncSP_pred(node);
 224         ir_node *predpred;
 225
 226         if(!be_is_IncSP(pred))
 227                 return;
 228
 229         if(get_irn_n_edges(pred) > 1)
 230                 return;
 231
 232         pred_offs = be_get_IncSP_offset(pred);
 233         curr_offs = be_get_IncSP_offset(node);
 234
 235         if(pred_offs == BE_STACK_FRAME_SIZE_EXPAND) {
 236                 if(curr_offs != BE_STACK_FRAME_SIZE_SHRINK) {
 237                         return;
 238                 }
 239                 offs = 0;
 240         } else if(pred_offs == BE_STACK_FRAME_SIZE_SHRINK) {
 241                 if(curr_offs != BE_STACK_FRAME_SIZE_EXPAND) {
 242                         return;
 243                 }
 244                 offs = 0;
 245         } else if(curr_offs == BE_STACK_FRAME_SIZE_EXPAND
 246                         || curr_offs == BE_STACK_FRAME_SIZE_SHRINK) {
 247                 return;
 248         } else {
 249                 offs = curr_offs + pred_offs;
 250         }
 251
 252         be_set_IncSP_offset(node, offs);
 253
 254         /* rewire dependency edges */
 255         predpred = be_get_IncSP_pred(pred);
 256         edges_reroute_kind(pred, predpred, EDGE_KIND_DEP, current_ir_graph);
 257
 258         /* Omit the IncSP */
 259         be_set_IncSP_pred(node, predpred);
 260         sched_remove(pred);
 261         be_kill_node(pred);
 262 }
 263
 264 /**
 265  * Performs Peephole Optimizations.
 266  */
 267 static void ia32_peephole_optimize_node(ir_node *node, void *env) {
 268         ia32_code_gen_t *cg = env;
 269
 270         if (be_is_IncSP(node)) {
 271                 ia32_optimize_IncSP(node);
 272
 273                 if (cg->opt & IA32_OPT_PUSHARGS)
 274                         ia32_create_Pushs(node, cg);
 275         }
 276 }
 277
 278 void ia32_peephole_optimization(ir_graph *irg, ia32_code_gen_t *cg) {
 279         irg_walk_graph(irg, ia32_peephole_optimize_node, NULL, cg);
 280 }
 281
 282 /******************************************************************
 283  *              _     _                   __  __           _
 284  *     /\      | |   | |                 |  \/  |         | |
 285  *    /  \   __| | __| |_ __ ___  ___ ___| \  / | ___   __| | ___
 286  *   / /\ \ / _` |/ _` | '__/ _ \/ __/ __| |\/| |/ _ \ / _` |/ _ \
 287  *  / ____ \ (_| | (_| | | |  __/\__ \__ \ |  | | (_) | (_| |  __/
 288  * /_/    \_\__,_|\__,_|_|  \___||___/___/_|  |_|\___/ \__,_|\___|
 289  *
 290  ******************************************************************/
 291
 292 /**
 293  * Removes node from schedule if it is not used anymore. If irn is a mode_T node
 294  * all it's Projs are removed as well.
 295  * @param irn  The irn to be removed from schedule
 296  */
 297 static INLINE void try_kill(ir_node *node)
 298 {
 299         if(get_irn_mode(node) == mode_T) {
 300                 const ir_edge_t *edge, *next;
 301                 foreach_out_edge_safe(node, edge, next) {
 302                         ir_node *proj = get_edge_src_irn(edge);
 303                         try_kill(proj);
 304                 }
 305         }
 306
 307         if(get_irn_n_edges(node) != 0)
 308                 return;
 309
 310         if (sched_is_scheduled(node)) {
 311                 sched_remove(node);
 312         }
 313
 314         be_kill_node(node);
 315 }
 316
 317 static void optimize_conv_store(ir_node *node)
 318 {
 319         ir_node *pred;
 320         ir_mode *conv_mode;
 321         ir_mode *store_mode;
 322
 323         if(!is_ia32_Store(node) && !is_ia32_Store8Bit(node))
 324                 return;
 325
 326         pred = get_irn_n(node, 2);
 327         if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
 328                 return;
 329
 330         /* the store only stores the lower bits, so we only need the conv
 331          * it it shrinks the mode */
 332         conv_mode  = get_ia32_ls_mode(pred);
 333         store_mode = get_ia32_ls_mode(node);
 334         if(get_mode_size_bits(conv_mode) < get_mode_size_bits(store_mode))
 335                 return;
 336
 337         set_irn_n(node, 2, get_irn_n(pred, 2));
 338         if(get_irn_n_edges(pred) == 0) {
 339                 be_kill_node(pred);
 340         }
 341 }
 342
 343 static void optimize_load_conv(ir_node *node)
 344 {
 345         ir_node *pred, *predpred;
 346         ir_mode *load_mode;
 347         ir_mode *conv_mode;
 348
 349         if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
 350                 return;
 351
 352         pred = get_irn_n(node, 2);
 353         if(!is_Proj(pred))
 354                 return;
 355
 356         predpred = get_Proj_pred(pred);
 357         if(!is_ia32_Load(predpred))
 358                 return;
 359
 360         /* the load is sign extending the upper bits, so we only need the conv
 361          * if it shrinks the mode */
 362         load_mode = get_ia32_ls_mode(predpred);
 363         conv_mode = get_ia32_ls_mode(node);
 364         if(get_mode_size_bits(conv_mode) < get_mode_size_bits(load_mode))
 365                 return;
 366
 367         if(get_mode_sign(conv_mode) != get_mode_sign(load_mode)) {
 368                 /* change the load if it has only 1 user */
 369                 if(get_irn_n_edges(pred) == 1) {
 370                         ir_mode *newmode;
 371                         if(get_mode_sign(conv_mode)) {
 372                                 newmode = find_signed_mode(load_mode);
 373                         } else {
 374                                 newmode = find_unsigned_mode(load_mode);
 375                         }
 376                         assert(newmode != NULL);
 377                         set_ia32_ls_mode(predpred, newmode);
 378                 } else {
 379                         /* otherwise we have to keep the conv */
 380                         return;
 381                 }
 382         }
 383
 384         /* kill the conv */
 385         exchange(node, pred);
 386 }
 387
 388 static void optimize_conv_conv(ir_node *node)
 389 {
 390         ir_node *pred_proj, *pred, *result_conv;
 391         ir_mode *pred_mode, *conv_mode;
 392
 393         if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
 394                 return;
 395
 396         assert(n_ia32_Conv_I2I_val == n_ia32_Conv_I2I8Bit_val);
 397         pred_proj = get_irn_n(node, n_ia32_Conv_I2I_val);
 398         if(is_Proj(pred_proj))
 399                 pred = get_Proj_pred(pred_proj);
 400         else
 401                 pred = pred_proj;
 402
 403         if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
 404                 return;
 405
 406         /* we know that after a conv, the upper bits are sign extended
 407          * so we only need the 2nd conv if it shrinks the mode */
 408         conv_mode = get_ia32_ls_mode(node);
 409         pred_mode = get_ia32_ls_mode(pred);
 410         /* if 2nd conv is smaller then first conv, then we can always take the 2nd
 411          * conv */
 412         if(get_mode_size_bits(conv_mode) <= get_mode_size_bits(pred_mode)) {
 413                 if(get_irn_n_edges(pred_proj) == 1) {
 414                         result_conv = pred_proj;
 415                         set_ia32_ls_mode(pred, conv_mode);
 416
 417                         /* Argh:We must change the opcode to 8bit AND copy the register constraints */
 418                         if (get_mode_size_bits(conv_mode) == 8) {
 419                                 set_irn_op(pred, op_ia32_Conv_I2I8Bit);
 420                                 set_ia32_in_req_all(pred, get_ia32_in_req_all(node));
 421                         }
 422                 } else {
 423                         /* TODO: construct syncs/stuff here but we'll probably end up with
 424                          * 2 statements anyway */
 425                         if(get_irn_mode(pred) == mode_T) {
 426                                 return;
 427                         }
 428
 429                         result_conv = exact_copy(pred);
 430                         set_ia32_ls_mode(result_conv, conv_mode);
 431
 432                         /* Argh:We must change the opcode to 8bit AND copy the register constraints */
 433                         if (get_mode_size_bits(conv_mode) == 8) {
 434                                 set_irn_op(result_conv, op_ia32_Conv_I2I8Bit);
 435                                 set_ia32_in_req_all(result_conv, get_ia32_in_req_all(node));
 436                         }
 437                 }
 438         } else {
 439                 /* if both convs have the same sign, then we can take the smaller one */
 440                 if(get_mode_sign(conv_mode) == get_mode_sign(pred_mode)) {
 441                         result_conv = pred_proj;
 442                 } else {
 443                         /* no optimisation possible if smaller conv is sign-extend */
 444                         if(mode_is_signed(pred_mode)) {
 445                                 return;
 446                         }
 447                         /* we can take the smaller conv if it is unsigned */
 448                         result_conv = pred_proj;
 449                 }
 450         }
 451
 452         /* kill the conv */
 453         exchange(node, result_conv);
 454
 455         if(get_irn_n_edges(pred) == 0) {
 456                 be_kill_node(pred);
 457         }
 458         optimize_conv_conv(result_conv);
 459 }
 460
 461 static void optimize_node(ir_node *node, void *env)
 462 {
 463         (void) env;
 464
 465         optimize_load_conv(node);
 466         optimize_conv_store(node);
 467         optimize_conv_conv(node);
 468 }
 469
 470 /**
 471  * Performs conv and address mode optimization.
 472  */
 473 void ia32_optimize_graph(ia32_code_gen_t *cg)
 474 {
 475         irg_walk_blkwise_graph(cg->irg, NULL, optimize_node, cg);
 476
 477         if (cg->dump)
 478                 be_dump(cg->irg, "-opt", dump_ir_block_graph_sched);
 479 }
 480
 481 void ia32_init_optimize(void)
 482 {
 483         FIRM_DBG_REGISTER(dbg, "firm.be.ia32.optimize");
 484 }