nsz Git - libfirm/blob - ir/be/ia32/ia32_optimize.c

   1 /*
   2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief       Implements several optimizations for IA32.
  23  * @author      Christian Wuerdig
  24  * @version     $Id$
  25  */
  26 #ifdef HAVE_CONFIG_H
  27 #include "config.h"
  28 #endif
  29
  30 #include "irnode.h"
  31 #include "irprog_t.h"
  32 #include "ircons.h"
  33 #include "irtools.h"
  34 #include "firm_types.h"
  35 #include "iredges.h"
  36 #include "tv.h"
  37 #include "irgmod.h"
  38 #include "irgwalk.h"
  39 #include "height.h"
  40 #include "irbitset.h"
  41
  42 #include "../be_t.h"
  43 #include "../beabi.h"
  44 #include "../benode_t.h"
  45 #include "../besched_t.h"
  46
  47 #include "ia32_new_nodes.h"
  48 #include "bearch_ia32_t.h"
  49 #include "gen_ia32_regalloc_if.h"
  50 #include "ia32_transform.h"
  51 #include "ia32_dbg_stat.h"
  52 #include "ia32_util.h"
  53
  54 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
  55
  56 //#define AGGRESSIVE_AM
  57
  58 typedef enum {
  59         IA32_AM_CAND_NONE  = 0,  /**< no addressmode possible with irn inputs */
  60         IA32_AM_CAND_LEFT  = 1,  /**< addressmode possible with left input */
  61         IA32_AM_CAND_RIGHT = 2,  /**< addressmode possible with right input */
  62         IA32_AM_CAND_BOTH  = 3   /**< addressmode possible with both inputs */
  63 } ia32_am_cand_t;
  64
  65 typedef int is_op_func_t(const ir_node *n);
  66 typedef ir_node *load_func_t(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
  67
  68 /**
  69  * checks if a node represents the NOREG value
  70  */
  71 static INLINE int be_is_NoReg(ia32_code_gen_t *cg, const ir_node *irn) {
  72         return irn == cg->noreg_gp || irn == cg->noreg_xmm || irn == cg->noreg_vfp;
  73 }
  74
  75 /********************************************************************************************************
  76  *  _____                _           _         ____        _   _           _          _   _
  77  * |  __ \              | |         | |       / __ \      | | (_)         (_)        | | (_)
  78  * | |__) |__  ___ _ __ | |__   ___ | | ___  | |  | |_ __ | |_ _ _ __ ___  _ ______ _| |_ _  ___  _ __
  79  * |  ___/ _ \/ _ \ '_ \| '_ \ / _ \| |/ _ \ | |  | | '_ \| __| | '_ ` _ \| |_  / _` | __| |/ _ \| '_ \
  80  * | |  |  __/  __/ |_) | | | | (_) | |  __/ | |__| | |_) | |_| | | | | | | |/ / (_| | |_| | (_) | | | |
  81  * |_|   \___|\___| .__/|_| |_|\___/|_|\___|  \____/| .__/ \__|_|_| |_| |_|_/___\__,_|\__|_|\___/|_| |_|
  82  *                | |                               | |
  83  *                |_|                               |_|
  84  ********************************************************************************************************/
  85
  86 /**
  87  * NOTE: THESE PEEPHOLE OPTIMIZATIONS MUST BE CALLED AFTER SCHEDULING AND REGISTER ALLOCATION.
  88  */
  89
  90 // only optimize up to 48 stores behind IncSPs
  91 #define MAXPUSH_OPTIMIZE        48
  92
  93 /**
  94  * Tries to create pushs from IncSP,Store combinations
  95  */
  96 static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) {
  97         int i;
  98         int offset;
  99         ir_node *node;
 100         ir_node *stores[MAXPUSH_OPTIMIZE];
 101         ir_node *block = get_nodes_block(irn);
 102         ir_graph *irg = cg->irg;
 103         ir_node *curr_sp;
 104         ir_mode *spmode = get_irn_mode(irn);
 105
 106         memset(stores, 0, sizeof(stores));
 107
 108         assert(be_is_IncSP(irn));
 109
 110         offset = be_get_IncSP_offset(irn);
 111         if(offset < 4)
 112                 return;
 113
 114         /*
 115          * We first walk the schedule after the IncSP node as long as we find
 116          * suitable stores that could be transformed to a push.
 117          * We save them into the stores array which is sorted by the frame offset/4
 118          * attached to the node
 119          */
 120         for(node = sched_next(irn); !sched_is_end(node); node = sched_next(node)) {
 121                 ir_node *mem;
 122                 int offset;
 123                 int storeslot;
 124
 125                 // it has to be a store
 126                 if(!is_ia32_Store(node))
 127                         break;
 128
 129                 // it has to use our sp value
 130                 if(get_irn_n(node, 0) != irn)
 131                         continue;
 132                 // store has to be attached to NoMem
 133                 mem = get_irn_n(node, 3);
 134                 if(!is_NoMem(mem)) {
 135                         continue;
 136                 }
 137
 138                 if( (get_ia32_am_flavour(node) & ia32_am_IS) != 0)
 139                         break;
 140
 141                 offset = get_ia32_am_offs_int(node);
 142
 143                 storeslot = offset / 4;
 144                 if(storeslot >= MAXPUSH_OPTIMIZE)
 145                         continue;
 146
 147                 // storing into the same slot twice is bad (and shouldn't happen...)
 148                 if(stores[storeslot] != NULL)
 149                         break;
 150
 151                 // storing at half-slots is bad
 152                 if(offset % 4 != 0)
 153                         break;
 154
 155                 stores[storeslot] = node;
 156         }
 157
 158         curr_sp = get_irn_n(irn, 0);
 159
 160         // walk the stores in inverse order and create pushs for them
 161         i = (offset / 4) - 1;
 162         if(i >= MAXPUSH_OPTIMIZE) {
 163                 i = MAXPUSH_OPTIMIZE - 1;
 164         }
 165
 166         for( ; i >= 0; --i) {
 167                 const arch_register_t *spreg;
 168                 ir_node *push;
 169                 ir_node *val, *mem, *mem_proj;
 170                 ir_node *store = stores[i];
 171                 ir_node *noreg = ia32_new_NoReg_gp(cg);
 172
 173                 if(store == NULL || is_Bad(store))
 174                         break;
 175
 176                 val = get_irn_n(store, 2);
 177                 mem = get_irn_n(store, 3);
 178                 spreg = arch_get_irn_register(cg->arch_env, curr_sp);
 179
 180                 push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, val, curr_sp, mem);
 181
 182                 set_ia32_am_support(push, ia32_am_Source, ia32_am_unary);
 183                 copy_ia32_Immop_attr(push, store);
 184
 185                 sched_add_before(irn, push);
 186
 187                 // create stackpointer proj
 188                 curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack);
 189                 arch_set_irn_register(cg->arch_env, curr_sp, spreg);
 190
 191                 // create memory proj
 192                 mem_proj = new_r_Proj(irg, block, push, mode_M, pn_ia32_Push_M);
 193
 194                 // use the memproj now
 195                 exchange(store, mem_proj);
 196
 197                 // we can remove the store now
 198                 sched_remove(store);
 199
 200                 offset -= 4;
 201         }
 202
 203         be_set_IncSP_offset(irn, offset);
 204
 205         // can we remove the IncSP now?
 206         if(offset == 0) {
 207                 const ir_edge_t *edge, *next;
 208
 209                 foreach_out_edge_safe(irn, edge, next) {
 210                         ir_node *arg = get_edge_src_irn(edge);
 211                         int pos = get_edge_src_pos(edge);
 212
 213                         set_irn_n(arg, pos, curr_sp);
 214                 }
 215
 216                 set_irn_n(irn, 0, new_Bad());
 217                 sched_remove(irn);
 218         } else {
 219                 set_irn_n(irn, 0, curr_sp);
 220         }
 221 }
 222
 223 /**
 224  * Tries to optimize two following IncSP.
 225  */
 226 static void ia32_optimize_IncSP(ir_node *node)
 227 {
 228         int      pred_offs;
 229         int      curr_offs;
 230         int      offs;
 231         ir_node *pred = be_get_IncSP_pred(node);
 232         ir_node *predpred;
 233
 234         if(!be_is_IncSP(pred))
 235                 return;
 236
 237         if(get_irn_n_edges(pred) > 1)
 238                 return;
 239
 240         pred_offs = be_get_IncSP_offset(pred);
 241         curr_offs = be_get_IncSP_offset(node);
 242
 243         if(pred_offs == BE_STACK_FRAME_SIZE_EXPAND) {
 244                 if(curr_offs != BE_STACK_FRAME_SIZE_SHRINK) {
 245                         return;
 246                 }
 247                 offs = 0;
 248         } else if(pred_offs == BE_STACK_FRAME_SIZE_SHRINK) {
 249                 if(curr_offs != BE_STACK_FRAME_SIZE_EXPAND) {
 250                         return;
 251                 }
 252                 offs = 0;
 253         } else if(curr_offs == BE_STACK_FRAME_SIZE_EXPAND
 254                         || curr_offs == BE_STACK_FRAME_SIZE_SHRINK) {
 255                 return;
 256         } else {
 257                 offs = curr_offs + pred_offs;
 258         }
 259
 260         be_set_IncSP_offset(node, offs);
 261
 262         /* rewire dependency edges */
 263         predpred = be_get_IncSP_pred(pred);
 264         edges_reroute_kind(pred, predpred, EDGE_KIND_DEP, current_ir_graph);
 265
 266         /* Omit the IncSP */
 267         be_set_IncSP_pred(node, predpred);
 268         sched_remove(pred);
 269         be_kill_node(pred);
 270 }
 271
 272 /**
 273  * Performs Peephole Optimizations.
 274  */
 275 static void ia32_peephole_optimize_node(ir_node *node, void *env) {
 276         ia32_code_gen_t *cg = env;
 277
 278         if (be_is_IncSP(node)) {
 279                 ia32_optimize_IncSP(node);
 280
 281                 if (cg->opt & IA32_OPT_PUSHARGS)
 282                         ia32_create_Pushs(node, cg);
 283         }
 284 }
 285
 286 void ia32_peephole_optimization(ir_graph *irg, ia32_code_gen_t *cg) {
 287         irg_walk_graph(irg, ia32_peephole_optimize_node, NULL, cg);
 288 }
 289
 290 /******************************************************************
 291  *              _     _                   __  __           _
 292  *     /\      | |   | |                 |  \/  |         | |
 293  *    /  \   __| | __| |_ __ ___  ___ ___| \  / | ___   __| | ___
 294  *   / /\ \ / _` |/ _` | '__/ _ \/ __/ __| |\/| |/ _ \ / _` |/ _ \
 295  *  / ____ \ (_| | (_| | | |  __/\__ \__ \ |  | | (_) | (_| |  __/
 296  * /_/    \_\__,_|\__,_|_|  \___||___/___/_|  |_|\___/ \__,_|\___|
 297  *
 298  ******************************************************************/
 299
 300 typedef struct {
 301         ia32_code_gen_t *cg;
 302         heights_t       *h;
 303 } ia32_am_opt_env_t;
 304
 305 static int node_is_ia32_comm(const ir_node *irn) {
 306         return is_ia32_irn(irn) ? is_ia32_commutative(irn) : 0;
 307 }
 308
 309 static int ia32_get_irn_n_edges(const ir_node *irn) {
 310         const ir_edge_t *edge;
 311         int cnt = 0;
 312
 313         foreach_out_edge(irn, edge) {
 314                 cnt++;
 315         }
 316
 317         return cnt;
 318 }
 319
 320 /**
 321  * Determines if pred is a Proj and if is_op_func returns true for it's predecessor.
 322  *
 323  * @param pred       The node to be checked
 324  * @param is_op_func The check-function
 325  * @return 1 if conditions are fulfilled, 0 otherwise
 326  */
 327 static int pred_is_specific_node(const ir_node *pred, is_op_func_t *is_op_func) {
 328         return is_op_func(pred);
 329 }
 330
 331 /**
 332  * Determines if pred is a Proj and if is_op_func returns true for it's predecessor
 333  * and if the predecessor is in block bl.
 334  *
 335  * @param bl         The block
 336  * @param pred       The node to be checked
 337  * @param is_op_func The check-function
 338  * @return 1 if conditions are fulfilled, 0 otherwise
 339  */
 340 static int pred_is_specific_nodeblock(const ir_node *bl, const ir_node *pred,
 341         int (*is_op_func)(const ir_node *n))
 342 {
 343         if (is_Proj(pred)) {
 344                 pred = get_Proj_pred(pred);
 345                 if ((bl == get_nodes_block(pred)) && is_op_func(pred)) {
 346                         return 1;
 347                 }
 348         }
 349
 350         return 0;
 351 }
 352
 353 /**
 354  * Checks if irn is a candidate for address calculation. We avoid transforming
 355  * adds to leas if they have a load as pred, because then we can use AM mode
 356  * for the add later.
 357  *
 358  * - none of the operand must be a Load  within the same block OR
 359  * - all Loads must have more than one user                    OR
 360  *
 361  * @param block   The block the Loads must/mustnot be in
 362  * @param irn     The irn to check
 363  * return 1 if irn is a candidate, 0 otherwise
 364  */
 365 static int is_addr_candidate(const ir_node *irn)
 366 {
 367 #ifndef AGGRESSIVE_AM
 368         const ir_node *block = get_nodes_block(irn);
 369         ir_node *left, *right;
 370         int      n;
 371
 372         left  = get_irn_n(irn, 2);
 373         right = get_irn_n(irn, 3);
 374
 375         if (pred_is_specific_nodeblock(block, left, is_ia32_Ld)) {
 376                 n = ia32_get_irn_n_edges(left);
 377                 /* load with only one user: don't create LEA */
 378                 if(n == 1)
 379                         return 0;
 380         }
 381
 382         if (pred_is_specific_nodeblock(block, right, is_ia32_Ld)) {
 383                 n = ia32_get_irn_n_edges(right);
 384                 if(n == 1)
 385                         return 0;
 386         }
 387 #endif
 388
 389         (void) irn;
 390         return 1;
 391 }
 392
 393 /**
 394  * Checks if irn is a candidate for address mode.
 395  *
 396  * address mode (AM):
 397  * - at least one operand has to be a Load within the same block AND
 398  * - the load must not have other users than the irn             AND
 399  * - the irn must not have a frame entity set
 400  *
 401  * @param cg          The ia32 code generator
 402  * @param h           The height information of the irg
 403  * @param block       The block the Loads must/mustnot be in
 404  * @param irn         The irn to check
 405  * @return 0 if irn is no candidate, 1 if left load can be used, 2 if right one, 3 for both
 406  */
 407 static ia32_am_cand_t is_am_candidate(heights_t *h, const ir_node *block, ir_node *irn) {
 408         ir_node *in, *load, *other, *left, *right;
 409         int      is_cand = 0, cand;
 410         int      arity;
 411         int      is_binary;
 412
 413         if (is_ia32_Ld(irn) || is_ia32_St(irn) ||
 414                 is_ia32_vfild(irn) || is_ia32_vfist(irn) ||
 415                 is_ia32_xStoreSimple(irn))
 416                 return 0;
 417
 418         if(get_ia32_frame_ent(irn) != NULL)
 419                 return IA32_AM_CAND_NONE;
 420
 421         left      = get_irn_n(irn, 2);
 422         arity     = get_irn_arity(irn);
 423         is_binary = get_ia32_am_arity(irn) == ia32_am_binary;
 424         if(is_binary) {
 425                 /* binary op */
 426                 right = get_irn_n(irn, 3);
 427         } else {
 428                 assert(get_ia32_am_arity(irn) == ia32_am_unary);
 429                 /* unary op */
 430                 right = left;
 431         }
 432
 433         in = left;
 434
 435         if (pred_is_specific_nodeblock(block, in, is_ia32_Ld)) {
 436 #ifndef AGGRESSIVE_AM
 437                 int n;
 438                 n         = ia32_get_irn_n_edges(in);
 439                 is_cand   = (n == 1) ? 1 : is_cand;  /* load with more than one user: no AM */
 440 #else
 441                 is_cand   = 1;
 442 #endif
 443
 444                 load  = get_Proj_pred(in);
 445                 other = right;
 446
 447                 /* 8bit Loads are not supported (for binary ops),
 448                  * they cannot be used with every register */
 449                 if (get_ia32_am_arity(irn) == ia32_am_binary &&
 450                                 get_mode_size_bits(get_ia32_ls_mode(load)) < 16) {
 451                         is_cand = 0;
 452                 }
 453
 454                 /* If there is a data dependency of other irn from load: cannot use AM */
 455                 if (is_cand && is_binary && get_nodes_block(other) == block) {
 456                         other   = skip_Proj(other);
 457                         is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
 458                         /* this could happen in loops */
 459                         is_cand = heights_reachable_in_block(h, load, irn) ? 0 : is_cand;
 460                 }
 461         }
 462
 463         cand    = is_cand ? IA32_AM_CAND_LEFT : IA32_AM_CAND_NONE;
 464         in      = right;
 465         is_cand = 0;
 466
 467         if (pred_is_specific_nodeblock(block, in, is_ia32_Ld)) {
 468 #ifndef AGGRESSIVE_AM
 469                 int n;
 470                 n         = ia32_get_irn_n_edges(in);
 471                 is_cand   = (n == 1) ? 1 : is_cand;  /* load with more than one user: no AM */
 472 #else
 473                 is_cand = 1;
 474 #endif
 475
 476                 load  = get_Proj_pred(in);
 477                 other = left;
 478
 479                 /* 8bit Loads are not supported, they cannot be used with every register */
 480                 /* 8bit Loads are not supported (for binary ops),
 481                  * they cannot be used with every register */
 482                 if (get_ia32_am_arity(irn) == ia32_am_binary &&
 483                                 get_mode_size_bits(get_ia32_ls_mode(load)) < 16) {
 484                         is_cand = 0;
 485                 }
 486
 487                 /* If there is a data dependency of other irn from load: cannot use load */
 488                 if (is_cand && is_binary && get_nodes_block(other) == block) {
 489                         other   = skip_Proj(other);
 490                         is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
 491                         /* this could happen in loops */
 492                         is_cand = heights_reachable_in_block(h, load, irn) ? 0 : is_cand;
 493                 }
 494         }
 495
 496         cand = is_cand ? (cand | IA32_AM_CAND_RIGHT) : cand;
 497
 498         /* if the irn has a frame entity: we do not use address mode */
 499         return cand;
 500 }
 501
 502 /**
 503  * Compares the base and index addr and the load/store entities
 504  * and returns 1 if they are equal.
 505  */
 506 static int load_store_addr_is_equal(const ir_node *load, const ir_node *store,
 507                                                                         const ir_node *addr_b, const ir_node *addr_i)
 508 {
 509         if(get_irn_n(load, 0) != addr_b)
 510                 return 0;
 511         if(get_irn_n(load, 1) != addr_i)
 512                 return 0;
 513
 514         if(get_ia32_frame_ent(load) != get_ia32_frame_ent(store))
 515                 return 0;
 516
 517         if(get_ia32_am_sc(load) != get_ia32_am_sc(store))
 518                 return 0;
 519         if(is_ia32_am_sc_sign(load) != is_ia32_am_sc_sign(store))
 520                 return 0;
 521         if(get_ia32_am_offs_int(load) != get_ia32_am_offs_int(store))
 522                 return 0;
 523         if(get_ia32_ls_mode(load) != get_ia32_ls_mode(store))
 524                 return 0;
 525
 526         return 1;
 527 }
 528
 529 typedef enum _ia32_take_lea_attr {
 530         IA32_LEA_ATTR_NONE  = 0,
 531         IA32_LEA_ATTR_BASE  = (1 << 0),
 532         IA32_LEA_ATTR_INDEX = (1 << 1),
 533         IA32_LEA_ATTR_OFFS  = (1 << 2),
 534         IA32_LEA_ATTR_SCALE = (1 << 3),
 535         IA32_LEA_ATTR_AMSC  = (1 << 4),
 536         IA32_LEA_ATTR_FENT  = (1 << 5)
 537 } ia32_take_lea_attr;
 538
 539 /**
 540  * Decides if we have to keep the LEA operand or if we can assimilate it.
 541  */
 542 static int do_new_lea(ir_node *irn, ir_node *base, ir_node *index, ir_node *lea,
 543                 int have_am_sc, ia32_code_gen_t *cg)
 544 {
 545         ir_entity *irn_ent  = get_ia32_frame_ent(irn);
 546         ir_entity *lea_ent  = get_ia32_frame_ent(lea);
 547         int        ret_val  = 0;
 548         int        is_noreg_base  = be_is_NoReg(cg, base);
 549         int        is_noreg_index = be_is_NoReg(cg, index);
 550         ia32_am_flavour_t am_flav = get_ia32_am_flavour(lea);
 551
 552         /* If the Add and the LEA both have a different frame entity set: keep */
 553         if (irn_ent && lea_ent && (irn_ent != lea_ent))
 554                 return IA32_LEA_ATTR_NONE;
 555         else if (! irn_ent && lea_ent)
 556                 ret_val |= IA32_LEA_ATTR_FENT;
 557
 558         /* If the Add and the LEA both have already an address mode symconst: keep */
 559         if (have_am_sc && get_ia32_am_sc(lea))
 560                 return IA32_LEA_ATTR_NONE;
 561         else if (get_ia32_am_sc(lea))
 562                 ret_val |= IA32_LEA_ATTR_AMSC;
 563
 564         /* Check the different base-index combinations */
 565
 566         if (! is_noreg_base && ! is_noreg_index) {
 567                 /* Assimilate if base is the lea and the LEA is just a Base + Offset calculation */
 568                 if ((base == lea) && ! (am_flav & ia32_I ? 1 : 0)) {
 569                         if (am_flav & ia32_O)
 570                                 ret_val |= IA32_LEA_ATTR_OFFS;
 571
 572                         ret_val |= IA32_LEA_ATTR_BASE;
 573                 }
 574                 else
 575                         return IA32_LEA_ATTR_NONE;
 576         }
 577         else if (! is_noreg_base && is_noreg_index) {
 578                 /* Base is set but index not */
 579                 if (base == lea) {
 580                         /* Base points to LEA: assimilate everything */
 581                         if (am_flav & ia32_O)
 582                                 ret_val |= IA32_LEA_ATTR_OFFS;
 583                         if (am_flav & ia32_S)
 584                                 ret_val |= IA32_LEA_ATTR_SCALE;
 585                         if (am_flav & ia32_I)
 586                                 ret_val |= IA32_LEA_ATTR_INDEX;
 587
 588                         ret_val |= IA32_LEA_ATTR_BASE;
 589                 }
 590                 else if (am_flav & ia32_B ? 0 : 1) {
 591                         /* Base is not the LEA but the LEA is an index only calculation: assimilate */
 592                         if (am_flav & ia32_O)
 593                                 ret_val |= IA32_LEA_ATTR_OFFS;
 594                         if (am_flav & ia32_S)
 595                                 ret_val |= IA32_LEA_ATTR_SCALE;
 596
 597                         ret_val |= IA32_LEA_ATTR_INDEX;
 598                 }
 599                 else
 600                         return IA32_LEA_ATTR_NONE;
 601         }
 602         else if (is_noreg_base && ! is_noreg_index) {
 603                 /* Index is set but not base */
 604                 if (index == lea) {
 605                         /* Index points to LEA: assimilate everything */
 606                         if (am_flav & ia32_O)
 607                                 ret_val |= IA32_LEA_ATTR_OFFS;
 608                         if (am_flav & ia32_S)
 609                                 ret_val |= IA32_LEA_ATTR_SCALE;
 610                         if (am_flav & ia32_B)
 611                                 ret_val |= IA32_LEA_ATTR_BASE;
 612
 613                         ret_val |= IA32_LEA_ATTR_INDEX;
 614                 }
 615                 else if (am_flav & ia32_I ? 0 : 1) {
 616                         /* Index is not the LEA but the LEA is a base only calculation: assimilate */
 617                         if (am_flav & ia32_O)
 618                                 ret_val |= IA32_LEA_ATTR_OFFS;
 619                         if (am_flav & ia32_S)
 620                                 ret_val |= IA32_LEA_ATTR_SCALE;
 621
 622                         ret_val |= IA32_LEA_ATTR_BASE;
 623                 }
 624                 else
 625                         return IA32_LEA_ATTR_NONE;
 626         }
 627         else {
 628                 assert(0 && "There must have been set base or index");
 629         }
 630
 631         return ret_val;
 632 }
 633
 634 /**
 635  * Adds res before irn into schedule if irn was scheduled.
 636  * @param irn  The schedule point
 637  * @param res  The node to be scheduled
 638  */
 639 static INLINE void try_add_to_sched(ir_node *irn, ir_node *res) {
 640         if (sched_is_scheduled(irn))
 641                 sched_add_before(irn, res);
 642 }
 643
 644 /**
 645  * Removes node from schedule if it is not used anymore. If irn is a mode_T node
 646  * all it's Projs are removed as well.
 647  * @param irn  The irn to be removed from schedule
 648  */
 649 static INLINE void try_kill(ir_node *node)
 650 {
 651         if(get_irn_mode(node) == mode_T) {
 652                 const ir_edge_t *edge, *next;
 653                 foreach_out_edge_safe(node, edge, next) {
 654                         ir_node *proj = get_edge_src_irn(edge);
 655                         try_kill(proj);
 656                 }
 657         }
 658
 659         if(get_irn_n_edges(node) != 0)
 660                 return;
 661
 662         if (sched_is_scheduled(node)) {
 663                 sched_remove(node);
 664         }
 665
 666         be_kill_node(node);
 667 }
 668
 669 /**
 670  * Folds Add or Sub to LEA if possible
 671  */
 672 static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) {
 673         ir_graph   *irg        = get_irn_irg(irn);
 674         dbg_info   *dbg_info   = get_irn_dbg_info(irn);
 675         ir_node    *block      = get_nodes_block(irn);
 676         ir_node    *res        = irn;
 677         ir_node    *shift      = NULL;
 678         ir_node    *lea_o      = NULL;
 679         ir_node    *lea        = NULL;
 680         long        offs       = 0;
 681         long        offs_cnst  = 0;
 682         long        offs_lea   = 0;
 683         int         scale      = 0;
 684         int         isadd      = 0;
 685         int         dolea      = 0;
 686         int         have_am_sc = 0;
 687         int         am_sc_sign = 0;
 688         ir_entity  *am_sc      = NULL;
 689         ir_entity  *lea_ent    = NULL;
 690         ir_node    *noreg      = ia32_new_NoReg_gp(cg);
 691         ir_node    *left, *right, *temp;
 692         ir_node    *base, *index;
 693         int consumed_left_shift;
 694         ia32_am_flavour_t am_flav;
 695
 696         if (is_ia32_Add(irn))
 697                 isadd = 1;
 698
 699         left  = get_irn_n(irn, 2);
 700         right = get_irn_n(irn, 3);
 701
 702         /* "normalize" arguments in case of add with two operands */
 703         if  (isadd && ! be_is_NoReg(cg, right)) {
 704                 /* put LEA == ia32_am_O as right operand */
 705                 if (is_ia32_Lea(left) && get_ia32_am_flavour(left) == ia32_am_O) {
 706                         set_irn_n(irn, 2, right);
 707                         set_irn_n(irn, 3, left);
 708                         temp  = left;
 709                         left  = right;
 710                         right = temp;
 711                 }
 712
 713                 /* put LEA != ia32_am_O as left operand */
 714                 if (is_ia32_Lea(right) && get_ia32_am_flavour(right) != ia32_am_O) {
 715                         set_irn_n(irn, 2, right);
 716                         set_irn_n(irn, 3, left);
 717                         temp  = left;
 718                         left  = right;
 719                         right = temp;
 720                 }
 721
 722                 /* put SHL as left operand iff left is NOT a LEA */
 723                 if (! is_ia32_Lea(left) && pred_is_specific_node(right, is_ia32_Shl)) {
 724                         set_irn_n(irn, 2, right);
 725                         set_irn_n(irn, 3, left);
 726                         temp  = left;
 727                         left  = right;
 728                         right = temp;
 729                 }
 730         }
 731
 732         base    = left;
 733         index   = noreg;
 734         offs    = 0;
 735         scale   = 0;
 736         am_flav = 0;
 737
 738         /* check for operation with immediate */
 739         if (is_ia32_ImmConst(irn)) {
 740                 tarval *tv = get_ia32_Immop_tarval(irn);
 741
 742                 DBG((dbg, LEVEL_1, "\tfound op with imm const"));
 743
 744                 offs_cnst = get_tarval_long(tv);
 745                 dolea     = 1;
 746         }
 747         else if (isadd && is_ia32_ImmSymConst(irn)) {
 748                 DBG((dbg, LEVEL_1, "\tfound op with imm symconst"));
 749
 750                 have_am_sc = 1;
 751                 dolea      = 1;
 752                 am_sc      = get_ia32_Immop_symconst(irn);
 753                 am_sc_sign = is_ia32_am_sc_sign(irn);
 754         }
 755
 756         /* determine the operand which needs to be checked */
 757         temp = be_is_NoReg(cg, right) ? left : right;
 758
 759         /* check if right operand is AMConst (LEA with ia32_am_O)  */
 760         /* but we can only eat it up if there is no other symconst */
 761         /* because the linker won't accept two symconsts           */
 762         if (! have_am_sc && is_ia32_Lea(temp) && get_ia32_am_flavour(temp) == ia32_am_O) {
 763                 DBG((dbg, LEVEL_1, "\tgot op with LEA am_O"));
 764
 765                 offs_lea   = get_ia32_am_offs_int(temp);
 766                 am_sc      = get_ia32_am_sc(temp);
 767                 am_sc_sign = is_ia32_am_sc_sign(temp);
 768                 have_am_sc = 1;
 769                 dolea      = 1;
 770                 lea_o      = temp;
 771
 772                 if (temp == base)
 773                         base = noreg;
 774                 else if (temp == right)
 775                         right = noreg;
 776         }
 777
 778         if (isadd) {
 779                 /* default for add -> make right operand to index */
 780                 index               = right;
 781                 dolea               = 1;
 782                 consumed_left_shift = -1;
 783
 784                 DBG((dbg, LEVEL_1, "\tgot LEA candidate with index %+F\n", index));
 785
 786                 /* determine the operand which needs to be checked */
 787                 temp = left;
 788                 if (is_ia32_Lea(left)) {
 789                         temp = right;
 790                         consumed_left_shift = 0;
 791                 }
 792
 793                 /* check for SHL 1,2,3 */
 794                 if (pred_is_specific_node(temp, is_ia32_Shl)) {
 795                         ir_node *right = get_irn_n(temp, n_ia32_Shl_right);
 796
 797                         if (is_ia32_Immediate(right)) {
 798                                 const ia32_immediate_attr_t *attr
 799                                         = get_ia32_immediate_attr_const(right);
 800                                 long shiftval = attr->offset;
 801
 802                                 if (shiftval <= 3) {
 803                                         index               = get_irn_n(temp, 2);
 804                                         consumed_left_shift = consumed_left_shift < 0 ? 1 : 0;
 805                                         shift = temp;
 806                                         scale = shiftval;
 807
 808                                         DBG((dbg, LEVEL_1, "\tgot scaled index %+F\n", index));
 809                                 }
 810                         }
 811                 }
 812
 813                 /* fix base */
 814                 if (! be_is_NoReg(cg, index)) {
 815                         /* if we have index, but left == right -> no base */
 816                         if (left == right) {
 817                                 base = noreg;
 818                         }
 819                         else if (consumed_left_shift == 1) {
 820                                 /* -> base is right operand  */
 821                                 base = (right == lea_o) ? noreg : right;
 822                         }
 823                 }
 824         }
 825
 826         /* Try to assimilate a LEA as left operand */
 827         if (is_ia32_Lea(left) && (get_ia32_am_flavour(left) != ia32_am_O)) {
 828                 /* check if we can assimilate the LEA */
 829                 int take_attr = do_new_lea(irn, base, index, left, have_am_sc, cg);
 830
 831                 if (take_attr == IA32_LEA_ATTR_NONE) {
 832                         DBG((dbg, LEVEL_1, "\tleave old LEA, creating new one\n"));
 833                 }
 834                 else {
 835                         DBG((dbg, LEVEL_1, "\tgot LEA as left operand ... assimilating\n"));
 836                         lea = left; /* for statistics */
 837
 838                         if (take_attr & IA32_LEA_ATTR_OFFS)
 839                                 offs = get_ia32_am_offs_int(left);
 840
 841                         if (take_attr & IA32_LEA_ATTR_AMSC) {
 842                                 am_sc      = get_ia32_am_sc(left);
 843                                 have_am_sc = 1;
 844                                 am_sc_sign = is_ia32_am_sc_sign(left);
 845                         }
 846
 847                         if (take_attr & IA32_LEA_ATTR_SCALE)
 848                                 scale = get_ia32_am_scale(left);
 849
 850                         if (take_attr & IA32_LEA_ATTR_BASE)
 851                                 base = get_irn_n(left, 0);
 852
 853                         if (take_attr & IA32_LEA_ATTR_INDEX)
 854                                 index = get_irn_n(left, 1);
 855
 856                         if (take_attr & IA32_LEA_ATTR_FENT)
 857                                 lea_ent = get_ia32_frame_ent(left);
 858                 }
 859         }
 860
 861         /* ok, we can create a new LEA */
 862         if (dolea) {
 863                 res = new_rd_ia32_Lea(dbg_info, irg, block, base, index);
 864                 /* we don't want stuff before the barrier... */
 865                 if(be_is_NoReg(cg, base) && be_is_NoReg(cg, index)) {
 866                         add_irn_dep(res, get_irg_frame(irg));
 867                 }
 868
 869                 /* add the old offset of a previous LEA */
 870                 add_ia32_am_offs_int(res, offs);
 871
 872                 /* add the new offset */
 873                 if (isadd) {
 874                         add_ia32_am_offs_int(res, offs_cnst);
 875                         add_ia32_am_offs_int(res, offs_lea);
 876                 } else {
 877                         /* either lea_O-cnst, -cnst or -lea_O  */
 878                         if (offs_cnst != 0) {
 879                                 add_ia32_am_offs_int(res, offs_lea);
 880                                 add_ia32_am_offs_int(res, -offs_cnst);
 881                         } else {
 882                                 add_ia32_am_offs_int(res, offs_lea);
 883                         }
 884                 }
 885
 886                 /* set the address mode symconst */
 887                 if (have_am_sc) {
 888                         set_ia32_am_sc(res, am_sc);
 889                         if (am_sc_sign)
 890                                 set_ia32_am_sc_sign(res);
 891                 }
 892
 893                 /* copy the frame entity (could be set in case of Add */
 894                 /* which was a FrameAddr) */
 895                 if (lea_ent != NULL) {
 896                         set_ia32_frame_ent(res, lea_ent);
 897                         set_ia32_use_frame(res);
 898                 } else {
 899                         set_ia32_frame_ent(res, get_ia32_frame_ent(irn));
 900                         if(is_ia32_use_frame(irn))
 901                                 set_ia32_use_frame(res);
 902                 }
 903
 904                 /* set scale */
 905                 set_ia32_am_scale(res, scale);
 906
 907                 am_flav = ia32_am_N;
 908                 /* determine new am flavour */
 909                 if (offs || offs_cnst || offs_lea || have_am_sc) {
 910                         am_flav |= ia32_O;
 911                 }
 912                 if (! be_is_NoReg(cg, base)) {
 913                         am_flav |= ia32_B;
 914                 }
 915                 if (! be_is_NoReg(cg, index)) {
 916                         am_flav |= ia32_I;
 917                 }
 918                 if (scale > 0) {
 919                         am_flav |= ia32_S;
 920                 }
 921                 set_ia32_am_flavour(res, am_flav);
 922
 923                 set_ia32_op_type(res, ia32_AddrModeS);
 924
 925                 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(cg, irn));
 926
 927                 DBG((dbg, LEVEL_1, "\tLEA [%+F + %+F * %d + %d]\n", base, index, scale, get_ia32_am_offs_int(res)));
 928
 929                 assert(irn && "Couldn't find result proj");
 930
 931                 /* get the result Proj of the Add/Sub */
 932                 try_add_to_sched(irn, res);
 933
 934                 /* exchange the old op with the new LEA */
 935                 try_kill(irn);
 936                 exchange(irn, res);
 937
 938                 /* we will exchange it, report here before the Proj is created */
 939                 if (shift && lea && lea_o) {
 940                         try_kill(shift);
 941                         try_kill(lea);
 942                         try_kill(lea_o);
 943                         DBG_OPT_LEA4(irn, lea_o, lea, shift, res);
 944                 } else if (shift && lea) {
 945                         try_kill(shift);
 946                         try_kill(lea);
 947                         DBG_OPT_LEA3(irn, lea, shift, res);
 948                 } else if (shift && lea_o) {
 949                         try_kill(shift);
 950                         try_kill(lea_o);
 951                         DBG_OPT_LEA3(irn, lea_o, shift, res);
 952                 } else if (lea && lea_o) {
 953                         try_kill(lea);
 954                         try_kill(lea_o);
 955                         DBG_OPT_LEA3(irn, lea_o, lea, res);
 956                 } else if (shift) {
 957                         try_kill(shift);
 958                         DBG_OPT_LEA2(irn, shift, res);
 959                 } else if (lea) {
 960                         try_kill(lea);
 961                         DBG_OPT_LEA2(irn, lea, res);
 962                 } else if (lea_o) {
 963                         try_kill(lea_o);
 964                         DBG_OPT_LEA2(irn, lea_o, res);
 965                 } else {
 966                         DBG_OPT_LEA1(irn, res);
 967                 }
 968         }
 969
 970         return res;
 971 }
 972
 973
 974 /**
 975  * Merges a Load/Store node with a LEA.
 976  * @param irn The Load/Store node
 977  * @param lea The LEA
 978  */
 979 static void merge_loadstore_lea(ir_node *irn, ir_node *lea) {
 980         ir_entity *irn_ent = get_ia32_frame_ent(irn);
 981         ir_entity *lea_ent = get_ia32_frame_ent(lea);
 982
 983         /* If the irn and the LEA both have a different frame entity set: do not merge */
 984         if (irn_ent != NULL && lea_ent != NULL && (irn_ent != lea_ent))
 985                 return;
 986         else if (irn_ent == NULL && lea_ent != NULL) {
 987                 set_ia32_frame_ent(irn, lea_ent);
 988                 set_ia32_use_frame(irn);
 989         }
 990
 991         /* get the AM attributes from the LEA */
 992         add_ia32_am_offs_int(irn, get_ia32_am_offs_int(lea));
 993         set_ia32_am_scale(irn, get_ia32_am_scale(lea));
 994         set_ia32_am_flavour(irn, get_ia32_am_flavour(lea));
 995
 996         set_ia32_am_sc(irn, get_ia32_am_sc(lea));
 997         if (is_ia32_am_sc_sign(lea))
 998                 set_ia32_am_sc_sign(irn);
 999
1000         set_ia32_op_type(irn, is_ia32_Ld(irn) ? ia32_AddrModeS : ia32_AddrModeD);
1001
1002         /* set base and index */
1003         set_irn_n(irn, 0, get_irn_n(lea, 0));
1004         set_irn_n(irn, 1, get_irn_n(lea, 1));
1005
1006         try_kill(lea);
1007
1008         /* clear remat flag */
1009         set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
1010
1011         if (is_ia32_Ld(irn))
1012                 DBG_OPT_LOAD_LEA(lea, irn);
1013         else
1014                 DBG_OPT_STORE_LEA(lea, irn);
1015
1016 }
1017
1018 /**
1019  * Sets new_right index of irn to right and new_left index to left.
1020  * Also exchange left and right
1021  */
1022 static void exchange_left_right(ir_node *irn, ir_node **left, ir_node **right,
1023                                 int new_left, int new_right)
1024 {
1025         ir_node *temp;
1026
1027         assert(is_ia32_commutative(irn));
1028
1029         set_irn_n(irn, new_right, *right);
1030         set_irn_n(irn, new_left, *left);
1031
1032         temp   = *left;
1033         *left  = *right;
1034         *right = temp;
1035
1036         /* this is only needed for Compares, but currently ALL nodes
1037          * have this attribute :-) */
1038         set_ia32_pncode(irn, get_inversed_pnc(get_ia32_pncode(irn)));
1039 }
1040
1041 /**
1042  * Performs address calculation optimization (create LEAs if possible)
1043  */
1044 static void optimize_lea(ia32_code_gen_t *cg, ir_node *irn) {
1045         if (! is_ia32_irn(irn))
1046                 return;
1047
1048         /* Following cases can occur:                                  */
1049         /* - Sub (l, imm) -> LEA [base - offset]                       */
1050         /* - Sub (l, r == LEA with ia32_am_O)   -> LEA [base - offset] */
1051         /* - Add (l, imm) -> LEA [base + offset]                       */
1052         /* - Add (l, r == LEA with ia32_am_O)  -> LEA [base + offset]  */
1053         /* - Add (l == LEA with ia32_am_O, r)  -> LEA [base + offset]  */
1054         /* - Add (l, r) -> LEA [base + index * scale]                  */
1055         /*              with scale > 1 iff l/r == shl (1,2,3)          */
1056         if (is_ia32_Sub(irn) || is_ia32_Add(irn)) {
1057                 ir_node *res;
1058
1059                 if(!is_addr_candidate(irn))
1060                         return;
1061
1062                 DBG((dbg, LEVEL_1, "\tfound address calculation candidate %+F ... ", irn));
1063                 res = fold_addr(cg, irn);
1064
1065                 if (res != irn)
1066                         DB((dbg, LEVEL_1, "transformed into %+F\n", res));
1067                 else
1068                         DB((dbg, LEVEL_1, "not transformed\n"));
1069         } else if (is_ia32_Ld(irn) || is_ia32_St(irn)) {
1070                 /* - Load  -> LEA into Load  } TODO: If the LEA is used by more than one Load/Store */
1071                 /* - Store -> LEA into Store }       it might be better to keep the LEA             */
1072                 ir_node *left = get_irn_n(irn, 0);
1073
1074                 if (is_ia32_Lea(left)) {
1075                         const ir_edge_t *edge, *ne;
1076                         ir_node *src;
1077
1078                         /* merge all Loads/Stores connected to this LEA with the LEA */
1079                         foreach_out_edge_safe(left, edge, ne) {
1080                                 src = get_edge_src_irn(edge);
1081
1082                                 if (src && (get_edge_src_pos(edge) == 0) && (is_ia32_Ld(src) || is_ia32_St(src))) {
1083                                         DBG((dbg, LEVEL_1, "\nmerging %+F into %+F\n", left, irn));
1084                                         if (! is_ia32_got_lea(src))
1085                                                 merge_loadstore_lea(src, left);
1086                                         set_ia32_got_lea(src);
1087                                 }
1088                         }
1089                 }
1090         }
1091 }
1092
1093 static void optimize_conv_store(ir_node *node)
1094 {
1095         ir_node *pred;
1096         ir_mode *conv_mode;
1097         ir_mode *store_mode;
1098
1099         if(!is_ia32_Store(node) && !is_ia32_Store8Bit(node))
1100                 return;
1101
1102         pred = get_irn_n(node, 2);
1103         if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
1104                 return;
1105
1106         /* the store only stores the lower bits, so we only need the conv
1107          * it it shrinks the mode */
1108         conv_mode  = get_ia32_ls_mode(pred);
1109         store_mode = get_ia32_ls_mode(node);
1110         if(get_mode_size_bits(conv_mode) < get_mode_size_bits(store_mode))
1111                 return;
1112
1113         set_irn_n(node, 2, get_irn_n(pred, 2));
1114         if(get_irn_n_edges(pred) == 0) {
1115                 be_kill_node(pred);
1116         }
1117 }
1118
1119 static void optimize_load_conv(ir_node *node)
1120 {
1121         ir_node *pred, *predpred;
1122         ir_mode *load_mode;
1123         ir_mode *conv_mode;
1124
1125         if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
1126                 return;
1127
1128         pred = get_irn_n(node, 2);
1129         if(!is_Proj(pred))
1130                 return;
1131
1132         predpred = get_Proj_pred(pred);
1133         if(!is_ia32_Load(predpred))
1134                 return;
1135
1136         /* the load is sign extending the upper bits, so we only need the conv
1137          * if it shrinks the mode */
1138         load_mode = get_ia32_ls_mode(predpred);
1139         conv_mode = get_ia32_ls_mode(node);
1140         if(get_mode_size_bits(conv_mode) < get_mode_size_bits(load_mode))
1141                 return;
1142
1143         if(get_mode_sign(conv_mode) != get_mode_sign(load_mode)) {
1144                 /* change the load if it has only 1 user */
1145                 if(get_irn_n_edges(pred) == 1) {
1146                         ir_mode *newmode;
1147                         if(get_mode_sign(conv_mode)) {
1148                                 newmode = find_signed_mode(load_mode);
1149                         } else {
1150                                 newmode = find_unsigned_mode(load_mode);
1151                         }
1152                         assert(newmode != NULL);
1153                         set_ia32_ls_mode(predpred, newmode);
1154                 } else {
1155                         /* otherwise we have to keep the conv */
1156                         return;
1157                 }
1158         }
1159
1160         /* kill the conv */
1161         exchange(node, pred);
1162 }
1163
1164 static void optimize_conv_conv(ir_node *node)
1165 {
1166         ir_node *pred_proj, *pred, *result_conv;
1167         ir_mode *pred_mode, *conv_mode;
1168
1169         if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
1170                 return;
1171
1172         assert(n_ia32_Conv_I2I_val == n_ia32_Conv_I2I8Bit_val);
1173         pred_proj = get_irn_n(node, n_ia32_Conv_I2I_val);
1174         if(is_Proj(pred_proj))
1175                 pred = get_Proj_pred(pred_proj);
1176         else
1177                 pred = pred_proj;
1178
1179         if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
1180                 return;
1181
1182         /* we know that after a conv, the upper bits are sign extended
1183          * so we only need the 2nd conv if it shrinks the mode */
1184         conv_mode = get_ia32_ls_mode(node);
1185         pred_mode = get_ia32_ls_mode(pred);
1186         /* if 2nd conv is smaller then first conv, then we can always take the 2nd
1187          * conv */
1188         if(get_mode_size_bits(conv_mode) <= get_mode_size_bits(pred_mode)) {
1189                 if(get_irn_n_edges(pred_proj) == 1) {
1190                         result_conv = pred_proj;
1191                         set_ia32_ls_mode(pred, conv_mode);
1192                 } else {
1193                         /* TODO: construct syncs/stuff here but we'll probably end up with
1194                          * 2 statements anyway */
1195                         if(get_irn_mode(pred) == mode_T) {
1196                                 return;
1197                         }
1198
1199                         result_conv = exact_copy(pred);
1200                         set_ia32_ls_mode(result_conv, conv_mode);
1201                 }
1202         } else {
1203                 /* if both convs have the same sign, then we can take the smaller one */
1204                 if(get_mode_sign(conv_mode) == get_mode_sign(pred_mode)) {
1205                         result_conv = pred_proj;
1206                 } else {
1207                         /* no optimisation possible if smaller conv is sign-extend */
1208                         if(mode_is_signed(pred_mode)) {
1209                                 return;
1210                         }
1211                         /* we can take the smaller conv if it is unsigned */
1212                         result_conv = pred_proj;
1213                 }
1214         }
1215
1216         /* kill the conv */
1217         exchange(node, result_conv);
1218
1219         if(get_irn_n_edges(pred) == 0) {
1220                 be_kill_node(pred);
1221         }
1222         optimize_conv_conv(result_conv);
1223 }
1224
1225 static void optimize_node(ir_node *node, void *env)
1226 {
1227         ia32_code_gen_t *cg = env;
1228
1229         optimize_load_conv(node);
1230         optimize_conv_store(node);
1231         optimize_conv_conv(node);
1232         optimize_lea(cg, node);
1233 }
1234
1235 /**
1236  * Checks for address mode patterns and performs the
1237  * necessary transformations.
1238  * This function is called by a walker.
1239  */
1240 static void optimize_am(ir_node *irn, void *env) {
1241         ia32_am_opt_env_t *am_opt_env = env;
1242         ia32_code_gen_t   *cg         = am_opt_env->cg;
1243         ir_graph          *irg        = get_irn_irg(irn);
1244         heights_t         *h          = am_opt_env->h;
1245         ir_node           *block, *left, *right;
1246         ir_node           *store, *load, *mem_proj;
1247         ir_node           *addr_b, *addr_i;
1248         int               need_exchange_on_fail = 0;
1249         ia32_am_type_t    am_support;
1250         ia32_am_arity_t   am_arity;
1251         ia32_am_cand_t cand;
1252         ia32_am_cand_t orig_cand;
1253         int               dest_possible;
1254         int               source_possible;
1255
1256         static const arch_register_req_t dest_out_reg_req_0 = {
1257                 arch_register_req_type_none,
1258                 NULL,        /* regclass */
1259                 NULL,        /* limit bitset */
1260                 -1,          /* same pos */
1261                 -1           /* different pos */
1262         };
1263         static const arch_register_req_t *dest_am_out_reqs[] = {
1264                 &dest_out_reg_req_0
1265         };
1266
1267         if (!is_ia32_irn(irn) || is_ia32_Ld(irn) || is_ia32_St(irn))
1268                 return;
1269         if (is_ia32_Lea(irn))
1270                 return;
1271
1272         am_support = get_ia32_am_support(irn);
1273         am_arity   = get_ia32_am_arity(irn);
1274         block = get_nodes_block(irn);
1275
1276         /* fold following patterns:
1277          * - op -> Load into AMop with am_Source
1278          *   conditions:
1279          *     - op is am_Source capable AND
1280          *     - the Load is only used by this op AND
1281          *     - the Load is in the same block
1282          * - Store -> op -> Load  into AMop with am_Dest
1283          *   conditions:
1284          *     - op is am_Dest capable AND
1285          *     - the Store uses the same address as the Load AND
1286          *     - the Load is only used by this op AND
1287          *     - the Load and Store are in the same block AND
1288          *     - nobody else uses the result of the op
1289          */
1290         if (am_support == ia32_am_None)
1291                 return;
1292
1293         assert(am_arity == ia32_am_unary || am_arity == ia32_am_binary);
1294
1295         cand = is_am_candidate(h, block, irn);
1296         if (cand == IA32_AM_CAND_NONE)
1297                 return;
1298
1299         orig_cand = cand;
1300         DBG((dbg, LEVEL_1, "\tfound address mode candidate %+F (candleft %d candright %d)... \n", irn,
1301              cand & IA32_AM_CAND_LEFT, cand & IA32_AM_CAND_RIGHT));
1302
1303         left  = get_irn_n(irn, 2);
1304         if (am_arity == ia32_am_unary) {
1305                 assert(get_irn_arity(irn) >= 4);
1306                 right = left;
1307                 assert(cand == IA32_AM_CAND_BOTH);
1308         } else {
1309                 assert(get_irn_arity(irn) >= 5);
1310                 right = get_irn_n(irn, 3);
1311         }
1312
1313         dest_possible = am_support & ia32_am_Dest ? 1 : 0;
1314         source_possible = am_support & ia32_am_Source ? 1 : 0;
1315
1316         DBG((dbg, LEVEL_2, "\tdest_possible %d source_possible %d ... \n", dest_possible, source_possible));
1317
1318         if (dest_possible) {
1319                 addr_b = NULL;
1320                 addr_i = NULL;
1321                 store  = NULL;
1322
1323                 /* we should only have 1 user which is a store */
1324                 if (ia32_get_irn_n_edges(irn) == 1) {
1325                         ir_node *succ = get_edge_src_irn(get_irn_out_edge_first(irn));
1326
1327                         if (is_ia32_xStore(succ) || is_ia32_Store(succ)) {
1328                                 store  = succ;
1329                                 addr_b = get_irn_n(store, 0);
1330                                 addr_i = get_irn_n(store, 1);
1331                         }
1332                 }
1333
1334                 if (store == NULL) {
1335                         DBG((dbg, LEVEL_2, "\tno store found, not using dest_mode\n"));
1336                         dest_possible = 0;
1337                 }
1338         }
1339
1340         if (dest_possible) {
1341                 /* normalize nodes, we need the interesting load on the left side */
1342                 if (cand & IA32_AM_CAND_RIGHT) {
1343                         load = get_Proj_pred(right);
1344                         if (load_store_addr_is_equal(load, store, addr_b, addr_i)
1345                                         && node_is_ia32_comm(irn)) {
1346                                 DBG((dbg, LEVEL_2, "\texchanging left/right\n"));
1347                                 exchange_left_right(irn, &left, &right, 3, 2);
1348                                 need_exchange_on_fail ^= 1;
1349                                 if (cand == IA32_AM_CAND_RIGHT)
1350                                         cand = IA32_AM_CAND_LEFT;
1351                         }
1352                 }
1353         }
1354
1355         if (dest_possible) {
1356                 if(cand & IA32_AM_CAND_LEFT && is_Proj(left)) {
1357                         load = get_Proj_pred(left);
1358
1359 #ifndef AGGRESSIVE_AM
1360                         /* we have to be the only user of the load */
1361                         if (get_irn_n_edges(left) > 1) {
1362                                 DBG((dbg, LEVEL_2, "\tmatching load has too may users, not using dest_mode\n"));
1363                                 dest_possible = 0;
1364                         }
1365 #endif
1366                 } else {
1367                         DBG((dbg, LEVEL_2, "\tno matching load found, not using dest_mode"));
1368                         dest_possible = 0;
1369                 }
1370         }
1371
1372         if (dest_possible) {
1373                 /* the store has to use the loads memory or the same memory
1374                  * as the load */
1375                 ir_node *loadmem = get_irn_n(load, 2);
1376                 ir_node *storemem = get_irn_n(store, 3);
1377                 assert(get_irn_mode(loadmem) == mode_M);
1378                 assert(get_irn_mode(storemem) == mode_M);
1379                 /* TODO there could be a sync between store and load... */
1380                 if(storemem != loadmem && (!is_Proj(storemem) || get_Proj_pred(storemem) != load)) {
1381                         DBG((dbg, LEVEL_2, "\tload/store using different memories, not using dest_mode"));
1382                         dest_possible = 0;
1383                 }
1384         }
1385
1386         if (dest_possible) {
1387                 /* Compare Load and Store address */
1388                 if (!load_store_addr_is_equal(load, store, addr_b, addr_i)) {
1389                         DBG((dbg, LEVEL_2, "\taddresses not equal, not using dest_mode"));
1390                         dest_possible = 0;
1391                 }
1392         }
1393
1394         if (dest_possible) {
1395                 ir_mode *lsmode = get_ia32_ls_mode(load);
1396                 if(get_mode_size_bits(lsmode) != 32) {
1397                         dest_possible = 0;
1398                 }
1399         }
1400
1401         if (dest_possible) {
1402                 /* all conditions fullfilled, do the transformation */
1403                 assert(cand & IA32_AM_CAND_LEFT);
1404
1405                 /* set new base, index and attributes */
1406                 set_irn_n(irn, 0, addr_b);
1407                 set_irn_n(irn, 1, addr_i);
1408                 add_ia32_am_offs_int(irn, get_ia32_am_offs_int(load));
1409                 set_ia32_am_scale(irn, get_ia32_am_scale(load));
1410                 set_ia32_am_flavour(irn, get_ia32_am_flavour(load));
1411                 set_ia32_op_type(irn, ia32_AddrModeD);
1412                 set_ia32_frame_ent(irn, get_ia32_frame_ent(load));
1413                 set_ia32_ls_mode(irn, get_ia32_ls_mode(load));
1414
1415                 set_ia32_am_sc(irn, get_ia32_am_sc(load));
1416                 if (is_ia32_am_sc_sign(load))
1417                         set_ia32_am_sc_sign(irn);
1418
1419                 /* connect to Load memory and disconnect Load */
1420                 if (am_arity == ia32_am_binary) {
1421                         /* binary AMop */
1422                         set_irn_n(irn, 4, get_irn_n(load, 2));
1423                         set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
1424                 } else {
1425                         /* unary AMop */
1426                         set_irn_n(irn, 3, get_irn_n(load, 2));
1427                         set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
1428                 }
1429
1430                 /* change node mode and out register requirements */
1431                 set_irn_mode(irn, mode_M);
1432                 set_ia32_out_req_all(irn, dest_am_out_reqs);
1433
1434                 /* connect the memory Proj of the Store to the op */
1435                 edges_reroute(store, irn, irg);
1436
1437                 /* clear remat flag */
1438                 set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
1439
1440                 try_kill(store);
1441                 try_kill(load);
1442                 DBG_OPT_AM_D(load, store, irn);
1443
1444                 DB((dbg, LEVEL_1, "merged with %+F and %+F into dest AM\n", load, store));
1445                 need_exchange_on_fail = 0;
1446                 source_possible = 0;
1447         }
1448
1449         if (source_possible) {
1450                 /* normalize ops, we need the load on the right */
1451                 if(cand == IA32_AM_CAND_LEFT) {
1452                         if(node_is_ia32_comm(irn)) {
1453                                 exchange_left_right(irn, &left, &right, 3, 2);
1454                                 need_exchange_on_fail ^= 1;
1455                                 cand = IA32_AM_CAND_RIGHT;
1456                         } else {
1457                                 source_possible = 0;
1458                         }
1459                 }
1460         }
1461
1462         if (source_possible) {
1463                 /* all conditions fullfilled, do transform */
1464                 assert(cand & IA32_AM_CAND_RIGHT);
1465                 load = get_Proj_pred(right);
1466
1467                 if(get_irn_n_edges(right) > 1) {
1468                         source_possible = 0;
1469                 }
1470 #if 1
1471                 /* TODO: this isn't really needed, but the code below is buggy
1472                    as heights won't get recomputed when the graph is reconstructed
1473                    so we can only transform loads with the result proj only */
1474                 if(get_irn_n_edges(load) > 1) {
1475                         source_possible = 0;
1476                 }
1477 #endif
1478         }
1479
1480         if (source_possible) {
1481                 ir_mode *ls_mode = get_ia32_ls_mode(load);
1482                 if(get_mode_size_bits(ls_mode) != 32 || ls_mode == mode_D)
1483                         source_possible = 0;
1484
1485         }
1486
1487         if (source_possible) {
1488                 const ia32_attr_t *attr_load = get_ia32_attr_const(load);
1489                 ia32_attr_t       *attr_irn  = get_ia32_attr(irn);
1490                 addr_b = get_irn_n(load, 0);
1491                 addr_i = get_irn_n(load, 1);
1492
1493                 /* set new base, index and attributes */
1494                 set_irn_n(irn, 0, addr_b);
1495                 set_irn_n(irn, 1, addr_i);
1496                 add_ia32_am_offs_int(irn, get_ia32_am_offs_int(load));
1497                 set_ia32_am_scale(irn, get_ia32_am_scale(load));
1498                 set_ia32_am_flavour(irn, get_ia32_am_flavour(load));
1499                 set_ia32_op_type(irn, ia32_AddrModeS);
1500                 set_ia32_frame_ent(irn, get_ia32_frame_ent(load));
1501                 attr_irn->data.need_64bit_stackent
1502                         = attr_load->data.need_64bit_stackent;
1503                 attr_irn->data.need_32bit_stackent
1504                         = attr_load->data.need_32bit_stackent;
1505
1506                 /* set ls_mode if not already present (conv nodes already have ls_mode
1507                    set) */
1508                 if(get_ia32_ls_mode(irn) == NULL) {
1509                         set_ia32_ls_mode(irn, get_ia32_ls_mode(load));
1510                 }
1511
1512                 set_ia32_am_sc(irn, get_ia32_am_sc(load));
1513                 if (is_ia32_am_sc_sign(load))
1514                         set_ia32_am_sc_sign(irn);
1515
1516                 /* clear remat flag */
1517                 set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
1518
1519                 if (is_ia32_use_frame(load)) {
1520                         if(get_ia32_frame_ent(load) == NULL) {
1521                                 set_ia32_need_stackent(irn);
1522                         }
1523                         set_ia32_use_frame(irn);
1524                 }
1525
1526                 /* connect to Load memory and disconnect Load */
1527                 if (am_arity == ia32_am_binary) {
1528                         /* binary AMop */
1529                         right = ia32_get_admissible_noreg(cg, irn, 3);
1530                         set_irn_n(irn, 3, right);
1531                         set_irn_n(irn, 4, get_irn_n(load, n_ia32_Load_mem));
1532                 } else {
1533                         /* unary AMop */
1534                         right = ia32_get_admissible_noreg(cg, irn, 2);
1535                         set_irn_n(irn, 2, right);
1536                         set_irn_n(irn, 3, get_irn_n(load, n_ia32_Load_mem));
1537                 }
1538
1539                 DBG_OPT_AM_S(load, irn);
1540
1541                 /* If Load has a memory Proj, connect it to the op */
1542                 mem_proj = ia32_get_proj_for_mode(load, mode_M);
1543                 if (mem_proj != NULL) {
1544                         ir_node *res_proj;
1545                         ir_mode *mode = get_irn_mode(irn);
1546
1547                         if(mode != mode_T) {
1548                                 res_proj = new_rd_Proj(get_irn_dbg_info(irn), irg,
1549                                                                            get_nodes_block(irn),
1550                                                                            new_Unknown(mode_T), mode, 0);
1551                                 set_irn_mode(irn, mode_T);
1552                                 edges_reroute(irn, res_proj, irg);
1553                                 set_Proj_pred(res_proj, irn);
1554
1555                                 set_Proj_pred(mem_proj, irn);
1556                                 set_Proj_proj(mem_proj, 1);
1557                         } else {
1558                                 /* hacky: we need some proj number which is not used yet... */
1559                                 set_Proj_proj(mem_proj, -1);
1560                                 set_Proj_pred(mem_proj, irn);
1561                         }
1562                 }
1563
1564                 try_kill(load);
1565                 need_exchange_on_fail = 0;
1566
1567                 /* immediate are only allowed on the right side */
1568                 if(is_ia32_Immediate(left)) {
1569                         exchange_left_right(irn, &left, &right, 3, 2);
1570                 }
1571
1572                 DB((dbg, LEVEL_1, "merged with %+F into source AM\n", load));
1573         }
1574
1575         /* was exchanged but optimize failed: exchange back */
1576         if (need_exchange_on_fail) {
1577                 exchange_left_right(irn, &left, &right, 3, 2);
1578         }
1579 }
1580
1581 /**
1582  * Performs conv and address mode optimization.
1583  */
1584 void ia32_optimize_graph(ia32_code_gen_t *cg) {
1585         /* if we are supposed to do AM or LEA optimization: recalculate edges */
1586         if (! (cg->opt & (IA32_OPT_DOAM | IA32_OPT_LEA))) {
1587                 /* no optimizations at all */
1588                 return;
1589         }
1590
1591         /* beware: we cannot optimize LEA and AM in one run because */
1592         /*         LEA optimization adds new nodes to the irg which */
1593         /*         invalidates the phase data                       */
1594
1595         if (cg->opt & IA32_OPT_LEA) {
1596                 irg_walk_blkwise_graph(cg->irg, NULL, optimize_node, cg);
1597         }
1598
1599         if (cg->dump)
1600                 be_dump(cg->irg, "-lea", dump_ir_block_graph_sched);
1601
1602         /* hack for now, so these don't get created during optimize, because then
1603          * they will be unknown to the heights module
1604          */
1605         ia32_new_NoReg_gp(cg);
1606         ia32_new_NoReg_fp(cg);
1607         ia32_new_NoReg_vfp(cg);
1608
1609         if (cg->opt & IA32_OPT_DOAM) {
1610                 /* we need height information for am optimization */
1611                 heights_t *h = heights_new(cg->irg);
1612                 ia32_am_opt_env_t env;
1613
1614                 env.cg = cg;
1615                 env.h  = h;
1616
1617                 irg_walk_blkwise_graph(cg->irg, NULL, optimize_am, &env);
1618
1619                 heights_free(h);
1620         }
1621 }
1622
1623 void ia32_init_optimize(void)
1624 {
1625         FIRM_DBG_REGISTER(dbg, "firm.be.ia32.optimize");
1626 }