nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /*
   2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief   Machine dependent Firm optimizations.
  23  * @date    28.9.2004
  24  * @author  Sebastian Hack, Michael Beck
  25  * @version $Id$
  26  */
  27 #ifdef HAVE_CONFIG_H
  28 # include "config.h"
  29 #endif
  30
  31 #ifdef HAVE_STDLIB_H
  32 # include <stdlib.h>
  33 #endif
  34
  35 #include <assert.h>
  36
  37 #include "irnode_t.h"
  38 #include "irgraph_t.h"
  39 #include "irmode_t.h"
  40 #include "iropt_t.h"
  41 #include "ircons_t.h"
  42 #include "irgmod.h"
  43 #include "irvrfy.h"
  44 #include "tv_t.h"
  45 #include "dbginfo_t.h"
  46 #include "iropt_dbg.h"
  47 #include "irflag_t.h"
  48 #include "irhooks.h"
  49 #include "ircons.h"
  50 #include "irarch.h"
  51 #include "irreflect.h"
  52
  53 #undef DEB
  54
  55 #define MAX_BITSTR 64
  56
  57 /* when we need verifying */
  58 #ifdef NDEBUG
  59 # define IRN_VRFY_IRG(res, irg)
  60 #else
  61 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  62 #endif
  63
  64 /** The params got from the factory in arch_dep_init(...). */
  65 static const arch_dep_params_t *params = NULL;
  66
  67 /** The bit mask, which optimizations to apply. */
  68 static arch_dep_opts_t opts;
  69
  70 /* we need this new pseudo op */
  71 static ir_op *op_Mulh = NULL;
  72
  73 /**
  74  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  75  */
  76 static ir_node *
  77 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  78        ir_node *op1, ir_node *op2, ir_mode *mode)
  79 {
  80   ir_node *in[2];
  81   ir_node *res;
  82
  83   in[0] = op1;
  84   in[1] = op2;
  85   res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  86   res = optimize_node(res);
  87   IRN_VRFY_IRG(res, irg);
  88   return res;
  89 }
  90
  91 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  92
  93 void arch_dep_init(arch_dep_params_factory_t factory)
  94 {
  95   opts = arch_dep_none;
  96
  97   if (factory != NULL)
  98     params = factory();
  99
 100   if (! op_Mulh) {
 101     rflct_sig_t *sig;
 102     int mulh_opc = get_next_ir_opcode();
 103
 104     /* create the Mulh operation */
 105     op_Mulh = new_ir_op(mulh_opc, "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0, NULL);
 106     sig = rflct_signature_allocate(1, 3);
 107     rflct_signature_set_arg(sig, 0, 0, "Res", RFLCT_MC(Int), 0, 0);
 108     rflct_signature_set_arg(sig, 1, 0, "Block", RFLCT_MC(BB), 0, 0);
 109     rflct_signature_set_arg(sig, 1, 1, "Op 0", RFLCT_MC(Int), 0, 0);
 110     rflct_signature_set_arg(sig, 1, 2, "Op 1", RFLCT_MC(Int), 0, 0);
 111
 112     rflct_new_opcode(mulh_opc, "Mulh", 0);
 113     rflct_opcode_add_signature(mulh_opc, sig);
 114   }
 115 }
 116
 117 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
 118   opts = the_opts;
 119 }
 120
 121 /** check, whether a mode allows a Mulh instruction. */
 122 static int allow_Mulh(ir_mode *mode)
 123 {
 124   if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
 125     return 0;
 126   return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
 127 }
 128
 129 /* Replace Muls with Shifts and Add/Subs. */
 130 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
 131 {
 132   ir_node *res = irn;
 133   ir_mode *mode = get_irn_mode(irn);
 134
 135   /* If the architecture dependent optimizations were not initialized
 136      or this optimization was not enabled. */
 137   if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
 138     return irn;
 139
 140   if (get_irn_op(irn) == op_Mul && mode_is_int(mode)) {
 141     ir_node *block   = get_irn_n(irn, -1);
 142     ir_node *left    = get_binop_left(irn);
 143     ir_node *right   = get_binop_right(irn);
 144     tarval *tv       = NULL;
 145     ir_node *operand = NULL;
 146
 147     /* Look, if one operand is a constant. */
 148     if (get_irn_opcode(left) == iro_Const) {
 149       tv = get_Const_tarval(left);
 150       operand = right;
 151     } else if(get_irn_opcode(right) == iro_Const) {
 152       tv = get_Const_tarval(right);
 153       operand = left;
 154     }
 155
 156     if (tv != NULL) {
 157       int maximum_shifts = params->maximum_shifts;
 158       int also_use_subs = params->also_use_subs;
 159       int highest_shift_amount = params->highest_shift_amount;
 160
 161       char *bitstr = get_tarval_bitpattern(tv);
 162       char *p;
 163       int i, last = 0;
 164       int counter = 0;
 165       int curr_bit;
 166       int compr_len = 0;
 167       char compr[MAX_BITSTR];
 168
 169       int singleton;
 170       int end_of_group;
 171       int shift_with_sub[MAX_BITSTR] = { 0 };
 172       int shift_without_sub[MAX_BITSTR] = { 0 };
 173       int shift_with_sub_pos = 0;
 174       int shift_without_sub_pos = 0;
 175
 176 #if DEB
 177       {
 178         long val = get_tarval_long(tv);
 179         fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 180         for(p = bitstr; *p != '\0'; p++)
 181           printf("%c", *p);
 182         printf("\n");
 183       }
 184 #endif
 185
 186       for(p = bitstr; *p != '\0'; p++) {
 187         int bit = *p != '0';
 188
 189         if (bit != last) {
 190           /* The last was 1 we are now at 0 OR
 191            * The last was 0 and we are now at 1 */
 192           compr[compr_len++] = counter;
 193           counter = 1;
 194         }
 195         else
 196           counter++;
 197
 198         last = bit;
 199       }
 200       compr[compr_len++] = counter;
 201
 202
 203 #ifdef DEB
 204       {
 205         const char *prefix = "";
 206         for(i = 0; i < compr_len; i++, prefix = ",")
 207           fprintf(stderr, "%s%d", prefix, compr[i]);
 208         fprintf("\n");
 209       }
 210 #endif
 211
 212       /* Go over all recorded one groups. */
 213       curr_bit = compr[0];
 214
 215       for(i = 1; i < compr_len; i = end_of_group + 2) {
 216         int j, zeros_in_group, ones_in_group;
 217
 218         ones_in_group = compr[i];
 219         zeros_in_group = 0;
 220
 221         /* Scan for singular 0s in a sequence. */
 222         for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 223           zeros_in_group += 1;
 224           ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 225         }
 226         end_of_group = j - 1;
 227
 228         if(zeros_in_group >= ones_in_group - 1)
 229           end_of_group = i;
 230
 231 #ifdef DEB
 232         fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 233 #endif
 234
 235         singleton = compr[i] == 1 && i == end_of_group;
 236         for(j = i; j <= end_of_group; j += 2) {
 237           int curr_ones = compr[j];
 238           int biased_curr_bit = curr_bit + 1;
 239           int k;
 240
 241 #ifdef DEB
 242           fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 243 #endif
 244
 245           /* If this ones group is a singleton group (it has no
 246              singleton zeros inside. */
 247           if(singleton)
 248             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 249           else if(j == i)
 250             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 251
 252           for(k = 0; k < curr_ones; k++)
 253             shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 254
 255           curr_bit += curr_ones;
 256           biased_curr_bit = curr_bit + 1;
 257
 258           if(!singleton && j == end_of_group)
 259             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 260           else if(j != end_of_group)
 261             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 262
 263           curr_bit += compr[j + 1];
 264         }
 265
 266       }
 267
 268       {
 269         int *shifts = shift_with_sub;
 270         int n = shift_with_sub_pos;
 271         int highest_shift_wide = 0;
 272         int highest_shift_seq = 0;
 273         int last_shift = 0;
 274
 275         /* If we may not use subs, or we can achive the same with adds,
 276            prefer adds. */
 277         if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 278           shifts = shift_without_sub;
 279           n = shift_without_sub_pos;
 280         }
 281
 282         /* If the number of needed shifts exceeds the given maximum,
 283            use the Mul and exit. */
 284         if(n > maximum_shifts) {
 285 #ifdef DEB
 286           fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 287               maximum_shifts, n);
 288 #endif
 289           return irn;
 290         }
 291
 292         /* Compute the highest shift needed for both, the
 293            sequential and wide representations. */
 294         for(i = 0; i < n; i++) {
 295           int curr = abs(shifts[i]);
 296           int curr_seq = curr - last;
 297
 298           highest_shift_wide = curr > highest_shift_wide ? curr
 299             : highest_shift_wide;
 300           highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
 301             : highest_shift_seq;
 302
 303           last_shift = curr;
 304         }
 305
 306         /* If the highest shift amount is greater than the given limit,
 307            give back the Mul */
 308         if(highest_shift_seq > highest_shift_amount) {
 309 #ifdef DEB
 310           fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 311               highest_shift_seq, highest_shift_amount);
 312 #endif
 313           return irn;
 314         }
 315
 316         /* If we have subs, we cannot do sequential. */
 317         if(1 /* also_use_subs */) {
 318           if(n > 0) {
 319             ir_node *curr = NULL;
 320
 321             i = n - 1;
 322
 323             do {
 324               int curr_shift = shifts[i];
 325               int sub = curr_shift < 0;
 326               int amount = abs(curr_shift) - 1;
 327               ir_node *aux = operand;
 328
 329               assert(amount >= 0 && "What is a negative shift??");
 330
 331               if (amount != 0) {
 332                 ir_node *cnst = new_r_Const_long(current_ir_graph, block, mode_Iu, amount);
 333                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 334               }
 335
 336               if (curr) {
 337                 if (sub)
 338                   curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 339                 else
 340                   curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 341               } else
 342                 curr = aux;
 343
 344             } while(--i >= 0);
 345
 346             res = curr;
 347           }
 348         }
 349
 350 #ifdef DEB
 351         {
 352           const char *prefix = "";
 353           for (i = 0; i < n; ++i) {
 354             fprintf(stderr, "%s%d", prefix, shifts[i]);
 355             prefix = ", ";
 356           }
 357           fprintf(stderr, "\n");
 358         }
 359 #endif
 360
 361       }
 362
 363       if(bitstr)
 364         free(bitstr);
 365     }
 366
 367   }
 368
 369   if (res != irn)
 370     hook_arch_dep_replace_mul_with_shifts(irn);
 371
 372   return res;
 373 }
 374
 375 /**
 376  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 377  */
 378 static int tv_ld2(tarval *tv, int bits)
 379 {
 380   int i, k = 0, num;
 381
 382   for (num = i = 0; i < bits; ++i) {
 383     unsigned char v = get_tarval_sub_bits(tv, i);
 384
 385     if (v) {
 386       int j;
 387
 388       for (j = 0; j < 8; ++j)
 389         if ((1 << j) & v) {
 390           ++num;
 391           k = 8 * i + j;
 392         }
 393     }
 394   }
 395   if (num == 1)
 396     return k;
 397   return -1;
 398 }
 399
 400
 401 /* for shorter lines */
 402 #define ABS(a)    tarval_abs(a)
 403 #define NEG(a)    tarval_neg(a)
 404 #define NOT(a)    tarval_not(a)
 405 #define SHL(a, b) tarval_shl(a, b)
 406 #define SHR(a, b) tarval_shr(a, b)
 407 #define ADD(a, b) tarval_add(a, b)
 408 #define SUB(a, b) tarval_sub(a, b)
 409 #define MUL(a, b) tarval_mul(a, b)
 410 #define DIV(a, b) tarval_div(a, b)
 411 #define MOD(a, b) tarval_mod(a, b)
 412 #define CMP(a, b) tarval_cmp(a, b)
 413 #define CNV(a, m) tarval_convert_to(a, m)
 414 #define ONE(m)    get_mode_one(m)
 415 #define ZERO(m)   get_mode_null(m)
 416
 417 /** The result of a the magic() function. */
 418 struct ms {
 419   tarval *M;        /**< magic number */
 420   int s;            /**< shift amount */
 421   int need_add;     /**< an additional add is needed */
 422   int need_sub;     /**< an additional sub is needed */
 423 };
 424
 425 /**
 426  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 427  *
 428  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 429  */
 430 static struct ms magic(tarval *d)
 431 {
 432   ir_mode *mode   = get_tarval_mode(d);
 433   ir_mode *u_mode = find_unsigned_mode(mode);
 434   int bits        = get_mode_size_bits(u_mode);
 435   int p;
 436   tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 437   pn_Cmp d_cmp, M_cmp;
 438
 439   tarval *bits_minus_1, *two_bits_1;
 440
 441   struct ms mag;
 442
 443   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 444
 445   /* we need overflow mode to work correctly */
 446   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 447
 448   /* 2^(bits-1) */
 449   bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 450   two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 451
 452   ad  = CNV(ABS(d), u_mode);
 453   t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 454   anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 455   p   = bits - 1;                               /* Init: p */
 456   q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 457   r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 458   q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 459   r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 460
 461   do {
 462     ++p;
 463     q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 464     r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 465
 466     if (CMP(r1, anc) & pn_Cmp_Ge) {
 467       q1 = ADD(q1, ONE(u_mode));
 468       r1 = SUB(r1, anc);
 469     }
 470
 471     q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 472     r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 473
 474     if (CMP(r2, ad) & pn_Cmp_Ge) {
 475       q2 = ADD(q2, ONE(u_mode));
 476       r2 = SUB(r2, ad);
 477     }
 478
 479     delta = SUB(ad, r2);
 480   } while (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(u_mode)) & pn_Cmp_Eq));
 481
 482   d_cmp = CMP(d, ZERO(mode));
 483
 484   if (d_cmp & pn_Cmp_Ge)
 485     mag.M = ADD(CNV(q2, mode), ONE(mode));
 486   else
 487     mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 488
 489   M_cmp = CMP(mag.M, ZERO(mode));
 490
 491   mag.s = p - bits;
 492
 493   /* need an add if d > 0 && M < 0 */
 494   mag.need_add = d_cmp & pn_Cmp_Gt && M_cmp & pn_Cmp_Lt;
 495
 496   /* need a sub if d < 0 && M > 0 */
 497   mag.need_sub = d_cmp & pn_Cmp_Lt && M_cmp & pn_Cmp_Gt;
 498
 499   tarval_set_integer_overflow_mode(rem);
 500
 501   return mag;
 502 }
 503
 504 /** The result of the magicu() function. */
 505 struct mu {
 506   tarval *M;        /**< magic add constant */
 507   int s;            /**< shift amount */
 508   int need_add;     /**< add indicator */
 509 };
 510
 511 /**
 512  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 513  *
 514  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 515  */
 516 static struct mu magicu(tarval *d)
 517 {
 518   ir_mode *mode   = get_tarval_mode(d);
 519   int bits        = get_mode_size_bits(mode);
 520   int p;
 521   tarval *nc, *delta, *q1, *r1, *q2, *r2;
 522   tarval *bits_minus_1, *two_bits_1, *seven_ff;
 523
 524   struct mu magu;
 525
 526   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 527
 528   /* we need overflow mode to work correctly */
 529   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 530
 531   bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 532   two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 533   seven_ff     = SUB(two_bits_1, ONE(mode));
 534
 535   magu.need_add = 0;                            /* initialize the add indicator */
 536   nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 537   p  = bits - 1;                                /* Init: p */
 538   q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 539   r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 540   q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 541   r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 542
 543   do {
 544     ++p;
 545     if (CMP(r1, SUB(nc, r1)) & pn_Cmp_Ge) {
 546       q1 = ADD(ADD(q1, q1), ONE(mode));
 547       r1 = SUB(ADD(r1, r1), nc);
 548     }
 549     else {
 550       q1 = ADD(q1, q1);
 551       r1 = ADD(r1, r1);
 552     }
 553
 554     if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & pn_Cmp_Ge) {
 555       if (CMP(q2, seven_ff) & pn_Cmp_Ge)
 556         magu.need_add = 1;
 557
 558       q2 = ADD(ADD(q2, q2), ONE(mode));
 559       r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 560     }
 561     else {
 562       if (CMP(q2, two_bits_1) & pn_Cmp_Ge)
 563         magu.need_add = 1;
 564
 565       q2 = ADD(q2, q2);
 566       r2 = ADD(ADD(r2, r2), ONE(mode));
 567     }
 568     delta = SUB(SUB(d, ONE(mode)), r2);
 569   } while (p < 2*bits &&
 570           (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(mode)) & pn_Cmp_Eq)));
 571
 572   magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 573   magu.s = p - bits;                                 /* and shift amount */
 574
 575   tarval_set_integer_overflow_mode(rem);
 576
 577   return magu;
 578 }
 579
 580 /**
 581  * Build the Mulh replacement code for n / tv.
 582  *
 583  * Note that 'div' might be a mod or DivMod operation as well
 584  */
 585 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
 586 {
 587   dbg_info *dbg  = get_irn_dbg_info(div);
 588   ir_node *n     = get_binop_left(div);
 589   ir_node *block = get_irn_n(div, -1);
 590   ir_mode *mode  = get_irn_mode(n);
 591   int bits       = get_mode_size_bits(mode);
 592   ir_node *q, *t, *c;
 593
 594   /* Beware: do not transform bad code */
 595   if (is_Bad(n) || is_Bad(block))
 596     return div;
 597
 598   if (mode_is_signed(mode)) {
 599     struct ms mag = magic(tv);
 600
 601     /* generate the Mulh instruction */
 602     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 603     q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 604
 605     /* do we need an Add or Sub */
 606     if (mag.need_add)
 607       q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 608     else if (mag.need_sub)
 609       q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 610
 611     /* Do we need the shift */
 612     if (mag.s > 0) {
 613       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 614       q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 615     }
 616
 617     /* final */
 618     c = new_r_Const_long(current_ir_graph, block, mode_Iu, bits-1);
 619     t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 620
 621     q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 622   }
 623   else {
 624     struct mu mag = magicu(tv);
 625     ir_node *c;
 626
 627     /* generate the Mulh instruction */
 628     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 629     q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 630
 631     if (mag.need_add) {
 632       if (mag.s > 0) {
 633         /* use the GM scheme */
 634         t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 635
 636         c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 637         t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 638
 639         t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 640
 641         c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s-1);
 642         q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 643       }
 644       else {
 645         /* use the default scheme */
 646         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 647       }
 648     }
 649     else if (mag.s > 0) { /* default scheme, shift needed */
 650       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 651       q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 652     }
 653   }
 654   return q;
 655 }
 656
 657 /* Replace Divs with Shifts and Add/Subs and Mulh. */
 658 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
 659 {
 660   ir_node *res  = irn;
 661
 662   /* If the architecture dependent optimizations were not initialized
 663      or this optimization was not enabled. */
 664   if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 665     return irn;
 666
 667   if (get_irn_opcode(irn) == iro_Div) {
 668     ir_node *c = get_Div_right(irn);
 669     ir_node *block, *left;
 670     ir_mode *mode;
 671     tarval *tv, *ntv;
 672     dbg_info *dbg;
 673     int n, bits;
 674     int k, n_flag;
 675
 676     if (get_irn_op(c) != op_Const)
 677       return irn;
 678
 679     tv = get_Const_tarval(c);
 680
 681     /* check for division by zero */
 682     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 683       return irn;
 684
 685     left  = get_Div_left(irn);
 686     mode  = get_irn_mode(left);
 687     block = get_irn_n(irn, -1);
 688     dbg   = get_irn_dbg_info(irn);
 689
 690     bits = get_mode_size_bits(mode);
 691     n    = (bits + 7) / 8;
 692
 693     k = -1;
 694     if (mode_is_signed(mode)) {
 695       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 696       ntv = tarval_neg(tv);
 697       n_flag = 1;
 698       k = tv_ld2(ntv, n);
 699     }
 700
 701     if (k < 0) {
 702       n_flag = 0;
 703       k = tv_ld2(tv, n);
 704     }
 705
 706     if (k >= 0) { /* division by 2^k or -2^k */
 707       if (mode_is_signed(mode)) {
 708         ir_node *k_node;
 709         ir_node *curr = left;
 710
 711         if (k != 1) {
 712           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 713           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 714         }
 715
 716         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 717         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 718
 719         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 720
 721         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 722         res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 723
 724         if (n_flag) { /* negate the result */
 725           ir_node *k_node;
 726
 727           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 728           res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 729         }
 730       }
 731       else {      /* unsigned case */
 732         ir_node *k_node;
 733
 734         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 735         res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 736       }
 737     }
 738     else {
 739       /* other constant */
 740       if (allow_Mulh(mode))
 741         res = replace_div_by_mulh(irn, tv);
 742     }
 743   }
 744
 745   if (res != irn)
 746     hook_arch_dep_replace_division_by_const(irn);
 747
 748   return res;
 749 }
 750
 751 /* Replace Mods with Shifts and Add/Subs and Mulh. */
 752 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
 753 {
 754   ir_node *res  = irn;
 755
 756   /* If the architecture dependent optimizations were not initialized
 757      or this optimization was not enabled. */
 758   if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 759     return irn;
 760
 761   if (get_irn_opcode(irn) == iro_Mod) {
 762     ir_node *c = get_Mod_right(irn);
 763     ir_node *block, *left;
 764     ir_mode *mode;
 765     tarval *tv, *ntv;
 766     dbg_info *dbg;
 767     int n, bits;
 768     int k;
 769
 770     if (get_irn_op(c) != op_Const)
 771       return irn;
 772
 773     tv = get_Const_tarval(c);
 774
 775     /* check for division by zero */
 776     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 777       return irn;
 778
 779     left  = get_Mod_left(irn);
 780     mode  = get_irn_mode(left);
 781     block = get_irn_n(irn, -1);
 782     dbg   = get_irn_dbg_info(irn);
 783     bits = get_mode_size_bits(mode);
 784     n    = (bits + 7) / 8;
 785
 786     k = -1;
 787     if (mode_is_signed(mode)) {
 788       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 789       ntv = tarval_neg(tv);
 790       k = tv_ld2(ntv, n);
 791     }
 792
 793     if (k < 0) {
 794       k = tv_ld2(tv, n);
 795     }
 796
 797     if (k >= 0) {
 798       /* division by 2^k or -2^k:
 799        * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 800        */
 801       if (mode_is_signed(mode)) {
 802         ir_node *k_node;
 803         ir_node *curr = left;
 804
 805         if (k != 1) {
 806           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 807           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 808         }
 809
 810         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 811         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 812
 813         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 814
 815         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 816         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 817
 818         res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 819       }
 820       else {      /* unsigned case */
 821         ir_node *k_node;
 822
 823         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 824         res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 825       }
 826     }
 827     else {
 828       /* other constant */
 829       if (allow_Mulh(mode)) {
 830         res = replace_div_by_mulh(irn, tv);
 831
 832         res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 833
 834         /* res = arch_dep_mul_to_shift(res); */
 835
 836         res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 837       }
 838     }
 839   }
 840
 841   if (res != irn)
 842     hook_arch_dep_replace_division_by_const(irn);
 843
 844   return res;
 845 }
 846
 847 /* Replace DivMods with Shifts and Add/Subs and Mulh. */
 848 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
 849 {
 850   *div = *mod = NULL;
 851
 852   /* If the architecture dependent optimizations were not initialized
 853      or this optimization was not enabled. */
 854   if (params == NULL ||
 855       ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 856     return;
 857
 858   if (get_irn_opcode(irn) == iro_DivMod) {
 859     ir_node *c = get_DivMod_right(irn);
 860     ir_node *block, *left;
 861     ir_mode *mode;
 862     tarval *tv, *ntv;
 863     dbg_info *dbg;
 864     int n, bits;
 865     int k, n_flag;
 866
 867     if (get_irn_op(c) != op_Const)
 868       return;
 869
 870     tv = get_Const_tarval(c);
 871
 872     /* check for division by zero */
 873     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 874       return;
 875
 876     left  = get_DivMod_left(irn);
 877     mode  = get_irn_mode(left);
 878     block = get_irn_n(irn, -1);
 879     dbg   = get_irn_dbg_info(irn);
 880
 881     bits = get_mode_size_bits(mode);
 882     n    = (bits + 7) / 8;
 883
 884     k = -1;
 885     if (mode_is_signed(mode)) {
 886       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 887       ntv = tarval_neg(tv);
 888       n_flag = 1;
 889       k = tv_ld2(ntv, n);
 890     }
 891
 892     if (k < 0) {
 893       n_flag = 0;
 894       k = tv_ld2(tv, n);
 895     }
 896
 897     if (k >= 0) { /* division by 2^k or -2^k */
 898       if (mode_is_signed(mode)) {
 899         ir_node *k_node, *c_k;
 900         ir_node *curr = left;
 901
 902         if (k != 1) {
 903           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 904           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 905         }
 906
 907         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 908         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 909
 910         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 911
 912         c_k    = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 913
 914         *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 915
 916         if (n_flag) { /* negate the div result */
 917           ir_node *k_node;
 918
 919           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 920           *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 921         }
 922
 923         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 924         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 925
 926         *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 927       }
 928       else {      /* unsigned case */
 929         ir_node *k_node;
 930
 931         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 932         *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 933
 934         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 935         *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 936       }
 937     }
 938     else {
 939       /* other constant */
 940       if (allow_Mulh(mode)) {
 941         ir_node *t;
 942
 943         *div = replace_div_by_mulh(irn, tv);
 944
 945         t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 946
 947         /* t = arch_dep_mul_to_shift(t); */
 948
 949         *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 950       }
 951     }
 952   }
 953
 954   if (*div)
 955     hook_arch_dep_replace_division_by_const(irn);
 956 }
 957
 958
 959 static const arch_dep_params_t default_params = {
 960   1,  /* also use subs */
 961   4,  /* maximum shifts */
 962   31, /* maximum shift amount */
 963
 964   0,  /* allow Mulhs */
 965   0,  /* allow Mulus */
 966   32  /* Mulh allowed up to 32 bit */
 967 };
 968
 969 /* A default parameter factory for testing purposes. */
 970 const arch_dep_params_t *arch_dep_default_factory(void) {
 971   return &default_params;
 972 }