nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /**
   2  * @file irarch.c
   3  * @date 28.9.2004
   4  * @author Sebastian Hack, Michael Beck
   5  * @brief Machine dependent Firm optimizations.
   6  *
   7  * $Id$
   8  */
   9 #ifdef HAVE_CONFIG_H
  10 # include "config.h"
  11 #endif
  12
  13 #ifdef HAVE_STDLIB_H
  14 # include <stdlib.h>
  15 #endif
  16
  17 #include <assert.h>
  18
  19 #include "irnode_t.h"
  20 #include "irgraph_t.h"
  21 #include "irmode_t.h"
  22 #include "iropt_t.h"
  23 #include "ircons_t.h"
  24 #include "irgmod.h"
  25 #include "irvrfy.h"
  26 #include "tv_t.h"
  27 #include "dbginfo_t.h"
  28 #include "iropt_dbg.h"
  29 #include "irflag_t.h"
  30 #include "irhooks.h"
  31 #include "ircons.h"
  32 #include "irarch.h"
  33 #include "irreflect.h"
  34
  35 #undef DEB
  36
  37 #define MAX_BITSTR 64
  38
  39 /* when we need verifying */
  40 #ifdef NDEBUG
  41 # define IRN_VRFY_IRG(res, irg)
  42 #else
  43 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  44 #endif
  45
  46 /** The params got from the factory in arch_dep_init(...). */
  47 static const arch_dep_params_t *params = NULL;
  48
  49 /** The bit mask, which optimizations to apply. */
  50 static arch_dep_opts_t opts;
  51
  52 /* we need this new pseudo op */
  53 static ir_op *op_Mulh = NULL;
  54
  55 /**
  56  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  57  */
  58 static ir_node *
  59 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  60        ir_node *op1, ir_node *op2, ir_mode *mode)
  61 {
  62   ir_node *in[2];
  63   ir_node *res;
  64
  65   in[0] = op1;
  66   in[1] = op2;
  67   res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  68   res = optimize_node(res);
  69   IRN_VRFY_IRG(res, irg);
  70   return res;
  71 }
  72
  73 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  74
  75 void arch_dep_init(arch_dep_params_factory_t factory)
  76 {
  77   opts = arch_dep_none;
  78
  79   if (factory != NULL)
  80     params = factory();
  81
  82   if (! op_Mulh) {
  83     rflct_sig_t *sig;
  84     int mulh_opc = get_next_ir_opcode();
  85
  86     /* create the Mulh operation */
  87     op_Mulh = new_ir_op(mulh_opc, "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0, NULL);
  88     sig = rflct_signature_allocate(1, 3);
  89     rflct_signature_set_arg(sig, 0, 0, "Res", RFLCT_MC(Int), 0, 0);
  90     rflct_signature_set_arg(sig, 1, 0, "Block", RFLCT_MC(BB), 0, 0);
  91     rflct_signature_set_arg(sig, 1, 1, "Op 0", RFLCT_MC(Int), 0, 0);
  92     rflct_signature_set_arg(sig, 1, 2, "Op 1", RFLCT_MC(Int), 0, 0);
  93
  94     rflct_new_opcode(mulh_opc, "Mulh", 0);
  95     rflct_opcode_add_signature(mulh_opc, sig);
  96   }
  97 }
  98
  99 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
 100   opts = the_opts;
 101 }
 102
 103 /** check, whether a mode allows a Mulh instruction. */
 104 static int allow_Mulh(ir_mode *mode)
 105 {
 106   if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
 107     return 0;
 108   return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
 109 }
 110
 111 /* Replace Muls with Shifts and Add/Subs. */
 112 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
 113 {
 114   ir_node *res = irn;
 115   ir_mode *mode = get_irn_mode(irn);
 116
 117   /* If the architecture dependent optimizations were not initialized
 118      or this optimization was not enabled. */
 119   if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
 120     return irn;
 121
 122   if (get_irn_op(irn) == op_Mul && mode_is_int(mode)) {
 123     ir_node *block   = get_irn_n(irn, -1);
 124     ir_node *left    = get_binop_left(irn);
 125     ir_node *right   = get_binop_right(irn);
 126     tarval *tv       = NULL;
 127     ir_node *operand = NULL;
 128
 129     /* Look, if one operand is a constant. */
 130     if (get_irn_opcode(left) == iro_Const) {
 131       tv = get_Const_tarval(left);
 132       operand = right;
 133     } else if(get_irn_opcode(right) == iro_Const) {
 134       tv = get_Const_tarval(right);
 135       operand = left;
 136     }
 137
 138     if (tv != NULL) {
 139       int maximum_shifts = params->maximum_shifts;
 140       int also_use_subs = params->also_use_subs;
 141       int highest_shift_amount = params->highest_shift_amount;
 142
 143       char *bitstr = get_tarval_bitpattern(tv);
 144       char *p;
 145       int i, last = 0;
 146       int counter = 0;
 147       int curr_bit;
 148       int compr_len = 0;
 149       char compr[MAX_BITSTR];
 150
 151       int singleton;
 152       int end_of_group;
 153       int shift_with_sub[MAX_BITSTR] = { 0 };
 154       int shift_without_sub[MAX_BITSTR] = { 0 };
 155       int shift_with_sub_pos = 0;
 156       int shift_without_sub_pos = 0;
 157
 158 #if DEB
 159       {
 160         long val = get_tarval_long(tv);
 161         fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 162         for(p = bitstr; *p != '\0'; p++)
 163           printf("%c", *p);
 164         printf("\n");
 165       }
 166 #endif
 167
 168       for(p = bitstr; *p != '\0'; p++) {
 169         int bit = *p != '0';
 170
 171         if (bit != last) {
 172           /* The last was 1 we are now at 0 OR
 173            * The last was 0 and we are now at 1 */
 174           compr[compr_len++] = counter;
 175           counter = 1;
 176         }
 177         else
 178           counter++;
 179
 180         last = bit;
 181       }
 182       compr[compr_len++] = counter;
 183
 184
 185 #ifdef DEB
 186       {
 187         const char *prefix = "";
 188         for(i = 0; i < compr_len; i++, prefix = ",")
 189           fprintf(stderr, "%s%d", prefix, compr[i]);
 190         fprintf("\n");
 191       }
 192 #endif
 193
 194       /* Go over all recorded one groups. */
 195       curr_bit = compr[0];
 196
 197       for(i = 1; i < compr_len; i = end_of_group + 2) {
 198         int j, zeros_in_group, ones_in_group;
 199
 200         ones_in_group = compr[i];
 201         zeros_in_group = 0;
 202
 203         /* Scan for singular 0s in a sequence. */
 204         for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 205           zeros_in_group += 1;
 206           ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 207         }
 208         end_of_group = j - 1;
 209
 210         if(zeros_in_group >= ones_in_group - 1)
 211           end_of_group = i;
 212
 213 #ifdef DEB
 214         fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 215 #endif
 216
 217         singleton = compr[i] == 1 && i == end_of_group;
 218         for(j = i; j <= end_of_group; j += 2) {
 219           int curr_ones = compr[j];
 220           int biased_curr_bit = curr_bit + 1;
 221           int k;
 222
 223 #ifdef DEB
 224           fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 225 #endif
 226
 227           /* If this ones group is a singleton group (it has no
 228              singleton zeros inside. */
 229           if(singleton)
 230             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 231           else if(j == i)
 232             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 233
 234           for(k = 0; k < curr_ones; k++)
 235             shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 236
 237           curr_bit += curr_ones;
 238           biased_curr_bit = curr_bit + 1;
 239
 240           if(!singleton && j == end_of_group)
 241             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 242           else if(j != end_of_group)
 243             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 244
 245           curr_bit += compr[j + 1];
 246         }
 247
 248       }
 249
 250       {
 251         int *shifts = shift_with_sub;
 252         int n = shift_with_sub_pos;
 253         int highest_shift_wide = 0;
 254         int highest_shift_seq = 0;
 255         int last_shift = 0;
 256
 257         /* If we may not use subs, or we can achive the same with adds,
 258            prefer adds. */
 259         if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 260           shifts = shift_without_sub;
 261           n = shift_without_sub_pos;
 262         }
 263
 264         /* If the number of needed shifts exceeds the given maximum,
 265            use the Mul and exit. */
 266         if(n > maximum_shifts) {
 267 #ifdef DEB
 268           fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 269               maximum_shifts, n);
 270 #endif
 271           return irn;
 272         }
 273
 274         /* Compute the highest shift needed for both, the
 275            sequential and wide representations. */
 276         for(i = 0; i < n; i++) {
 277           int curr = abs(shifts[i]);
 278           int curr_seq = curr - last;
 279
 280           highest_shift_wide = curr > highest_shift_wide ? curr
 281             : highest_shift_wide;
 282           highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
 283             : highest_shift_seq;
 284
 285           last_shift = curr;
 286         }
 287
 288         /* If the highest shift amount is greater than the given limit,
 289            give back the Mul */
 290         if(highest_shift_seq > highest_shift_amount) {
 291 #ifdef DEB
 292           fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 293               highest_shift_seq, highest_shift_amount);
 294 #endif
 295           return irn;
 296         }
 297
 298         /* If we have subs, we cannot do sequential. */
 299         if(1 /* also_use_subs */) {
 300           if(n > 0) {
 301             ir_node *curr = NULL;
 302
 303             i = n - 1;
 304
 305             do {
 306               int curr_shift = shifts[i];
 307               int sub = curr_shift < 0;
 308               int amount = abs(curr_shift) - 1;
 309               ir_node *aux = operand;
 310
 311               assert(amount >= 0 && "What is a negative shift??");
 312
 313               if (amount != 0) {
 314                 ir_node *cnst = new_r_Const_long(current_ir_graph, block, mode_Iu, amount);
 315                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 316               }
 317
 318               if (curr) {
 319                 if (sub)
 320                   curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 321                 else
 322                   curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 323               } else
 324                 curr = aux;
 325
 326             } while(--i >= 0);
 327
 328             res = curr;
 329           }
 330         }
 331
 332 #ifdef DEB
 333         {
 334           const char *prefix = "";
 335           for (i = 0; i < n; ++i) {
 336             fprintf(stderr, "%s%d", prefix, shifts[i]);
 337             prefix = ", ";
 338           }
 339           fprintf(stderr, "\n");
 340         }
 341 #endif
 342
 343       }
 344
 345       if(bitstr)
 346         free(bitstr);
 347     }
 348
 349   }
 350
 351   if (res != irn)
 352     hook_arch_dep_replace_mul_with_shifts(irn);
 353
 354   return res;
 355 }
 356
 357 /**
 358  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 359  */
 360 static int tv_ld2(tarval *tv, int bits)
 361 {
 362   int i, k = 0, num;
 363
 364   for (num = i = 0; i < bits; ++i) {
 365     unsigned char v = get_tarval_sub_bits(tv, i);
 366
 367     if (v) {
 368       int j;
 369
 370       for (j = 0; j < 8; ++j)
 371         if ((1 << j) & v) {
 372           ++num;
 373           k = 8 * i + j;
 374         }
 375     }
 376   }
 377   if (num == 1)
 378     return k;
 379   return -1;
 380 }
 381
 382
 383 /* for shorter lines */
 384 #define ABS(a)    tarval_abs(a)
 385 #define NEG(a)    tarval_neg(a)
 386 #define NOT(a)    tarval_not(a)
 387 #define SHL(a, b) tarval_shl(a, b)
 388 #define SHR(a, b) tarval_shr(a, b)
 389 #define ADD(a, b) tarval_add(a, b)
 390 #define SUB(a, b) tarval_sub(a, b)
 391 #define MUL(a, b) tarval_mul(a, b)
 392 #define DIV(a, b) tarval_div(a, b)
 393 #define MOD(a, b) tarval_mod(a, b)
 394 #define CMP(a, b) tarval_cmp(a, b)
 395 #define CNV(a, m) tarval_convert_to(a, m)
 396 #define ONE(m)    get_mode_one(m)
 397 #define ZERO(m)   get_mode_null(m)
 398
 399 /** The result of a the magic() function. */
 400 struct ms {
 401   tarval *M;        /**< magic number */
 402   int s;            /**< shift amount */
 403   int need_add;     /**< an additional add is needed */
 404   int need_sub;     /**< an additional sub is needed */
 405 };
 406
 407 /**
 408  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 409  *
 410  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 411  */
 412 static struct ms magic(tarval *d)
 413 {
 414   ir_mode *mode   = get_tarval_mode(d);
 415   ir_mode *u_mode = find_unsigned_mode(mode);
 416   int bits        = get_mode_size_bits(u_mode);
 417   int p;
 418   tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 419   pn_Cmp d_cmp, M_cmp;
 420
 421   tarval *bits_minus_1, *two_bits_1;
 422
 423   struct ms mag;
 424
 425   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 426
 427   /* we need overflow mode to work correctly */
 428   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 429
 430   /* 2^(bits-1) */
 431   bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 432   two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 433
 434   ad  = CNV(ABS(d), u_mode);
 435   t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 436   anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 437   p   = bits - 1;                               /* Init: p */
 438   q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 439   r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 440   q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 441   r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 442
 443   do {
 444     ++p;
 445     q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 446     r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 447
 448     if (CMP(r1, anc) & pn_Cmp_Ge) {
 449       q1 = ADD(q1, ONE(u_mode));
 450       r1 = SUB(r1, anc);
 451     }
 452
 453     q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 454     r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 455
 456     if (CMP(r2, ad) & pn_Cmp_Ge) {
 457       q2 = ADD(q2, ONE(u_mode));
 458       r2 = SUB(r2, ad);
 459     }
 460
 461     delta = SUB(ad, r2);
 462   } while (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(u_mode)) & pn_Cmp_Eq));
 463
 464   d_cmp = CMP(d, ZERO(mode));
 465
 466   if (d_cmp & pn_Cmp_Ge)
 467     mag.M = ADD(CNV(q2, mode), ONE(mode));
 468   else
 469     mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 470
 471   M_cmp = CMP(mag.M, ZERO(mode));
 472
 473   mag.s = p - bits;
 474
 475   /* need an add if d > 0 && M < 0 */
 476   mag.need_add = d_cmp & pn_Cmp_Gt && M_cmp & pn_Cmp_Lt;
 477
 478   /* need a sub if d < 0 && M > 0 */
 479   mag.need_sub = d_cmp & pn_Cmp_Lt && M_cmp & pn_Cmp_Gt;
 480
 481   tarval_set_integer_overflow_mode(rem);
 482
 483   return mag;
 484 }
 485
 486 /** The result of the magicu() function. */
 487 struct mu {
 488   tarval *M;        /**< magic add constant */
 489   int s;            /**< shift amount */
 490   int need_add;     /**< add indicator */
 491 };
 492
 493 /**
 494  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 495  *
 496  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 497  */
 498 static struct mu magicu(tarval *d)
 499 {
 500   ir_mode *mode   = get_tarval_mode(d);
 501   int bits        = get_mode_size_bits(mode);
 502   int p;
 503   tarval *nc, *delta, *q1, *r1, *q2, *r2;
 504   tarval *bits_minus_1, *two_bits_1, *seven_ff;
 505
 506   struct mu magu;
 507
 508   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 509
 510   /* we need overflow mode to work correctly */
 511   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 512
 513   bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 514   two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 515   seven_ff     = SUB(two_bits_1, ONE(mode));
 516
 517   magu.need_add = 0;                            /* initialize the add indicator */
 518   nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 519   p  = bits - 1;                                /* Init: p */
 520   q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 521   r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 522   q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 523   r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 524
 525   do {
 526     ++p;
 527     if (CMP(r1, SUB(nc, r1)) & pn_Cmp_Ge) {
 528       q1 = ADD(ADD(q1, q1), ONE(mode));
 529       r1 = SUB(ADD(r1, r1), nc);
 530     }
 531     else {
 532       q1 = ADD(q1, q1);
 533       r1 = ADD(r1, r1);
 534     }
 535
 536     if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & pn_Cmp_Ge) {
 537       if (CMP(q2, seven_ff) & pn_Cmp_Ge)
 538         magu.need_add = 1;
 539
 540       q2 = ADD(ADD(q2, q2), ONE(mode));
 541       r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 542     }
 543     else {
 544       if (CMP(q2, two_bits_1) & pn_Cmp_Ge)
 545         magu.need_add = 1;
 546
 547       q2 = ADD(q2, q2);
 548       r2 = ADD(ADD(r2, r2), ONE(mode));
 549     }
 550     delta = SUB(SUB(d, ONE(mode)), r2);
 551   } while (p < 2*bits &&
 552           (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(mode)) & pn_Cmp_Eq)));
 553
 554   magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 555   magu.s = p - bits;                                 /* and shift amount */
 556
 557   tarval_set_integer_overflow_mode(rem);
 558
 559   return magu;
 560 }
 561
 562 /**
 563  * Build the Mulh replacement code for n / tv.
 564  *
 565  * Note that 'div' might be a mod or DivMod operation as well
 566  */
 567 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
 568 {
 569   dbg_info *dbg  = get_irn_dbg_info(div);
 570   ir_node *n     = get_binop_left(div);
 571   ir_node *block = get_irn_n(div, -1);
 572   ir_mode *mode  = get_irn_mode(n);
 573   int bits       = get_mode_size_bits(mode);
 574   ir_node *q, *t, *c;
 575
 576   /* Beware: do not transform bad code */
 577   if (is_Bad(n) || is_Bad(block))
 578     return div;
 579
 580   if (mode_is_signed(mode)) {
 581     struct ms mag = magic(tv);
 582
 583     /* generate the Mulh instruction */
 584     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 585     q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 586
 587     /* do we need an Add or Sub */
 588     if (mag.need_add)
 589       q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 590     else if (mag.need_sub)
 591       q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 592
 593     /* Do we need the shift */
 594     if (mag.s > 0) {
 595       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 596       q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 597     }
 598
 599     /* final */
 600     c = new_r_Const_long(current_ir_graph, block, mode_Iu, bits-1);
 601     t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 602
 603     q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 604   }
 605   else {
 606     struct mu mag = magicu(tv);
 607     ir_node *c;
 608
 609     /* generate the Mulh instruction */
 610     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 611     q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 612
 613     if (mag.need_add) {
 614       if (mag.s > 0) {
 615         /* use the GM scheme */
 616         t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 617
 618         c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 619         t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 620
 621         t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 622
 623         c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s-1);
 624         q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 625       }
 626       else {
 627         /* use the default scheme */
 628         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 629       }
 630     }
 631     else if (mag.s > 0) { /* default scheme, shift needed */
 632       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 633       q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 634     }
 635   }
 636   return q;
 637 }
 638
 639 /* Replace Divs with Shifts and Add/Subs and Mulh. */
 640 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
 641 {
 642   ir_node *res  = irn;
 643
 644   /* If the architecture dependent optimizations were not initialized
 645      or this optimization was not enabled. */
 646   if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 647     return irn;
 648
 649   if (get_irn_opcode(irn) == iro_Div) {
 650     ir_node *c = get_Div_right(irn);
 651     ir_node *block, *left;
 652     ir_mode *mode;
 653     tarval *tv, *ntv;
 654     dbg_info *dbg;
 655     int n, bits;
 656     int k, n_flag;
 657
 658     if (get_irn_op(c) != op_Const)
 659       return irn;
 660
 661     tv = get_Const_tarval(c);
 662
 663     /* check for division by zero */
 664     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 665       return irn;
 666
 667     left  = get_Div_left(irn);
 668     mode  = get_irn_mode(left);
 669     block = get_irn_n(irn, -1);
 670     dbg   = get_irn_dbg_info(irn);
 671
 672     bits = get_mode_size_bits(mode);
 673     n    = (bits + 7) / 8;
 674
 675     k = -1;
 676     if (mode_is_signed(mode)) {
 677       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 678       ntv = tarval_neg(tv);
 679       n_flag = 1;
 680       k = tv_ld2(ntv, n);
 681     }
 682
 683     if (k < 0) {
 684       n_flag = 0;
 685       k = tv_ld2(tv, n);
 686     }
 687
 688     if (k >= 0) { /* division by 2^k or -2^k */
 689       if (mode_is_signed(mode)) {
 690         ir_node *k_node;
 691         ir_node *curr = left;
 692
 693         if (k != 1) {
 694           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 695           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 696         }
 697
 698         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 699         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 700
 701         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 702
 703         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 704         res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 705
 706         if (n_flag) { /* negate the result */
 707           ir_node *k_node;
 708
 709           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 710           res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 711         }
 712       }
 713       else {      /* unsigned case */
 714         ir_node *k_node;
 715
 716         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 717         res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 718       }
 719     }
 720     else {
 721       /* other constant */
 722       if (allow_Mulh(mode))
 723         res = replace_div_by_mulh(irn, tv);
 724     }
 725   }
 726
 727   if (res != irn)
 728     hook_arch_dep_replace_division_by_const(irn);
 729
 730   return res;
 731 }
 732
 733 /* Replace Mods with Shifts and Add/Subs and Mulh. */
 734 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
 735 {
 736   ir_node *res  = irn;
 737
 738   /* If the architecture dependent optimizations were not initialized
 739      or this optimization was not enabled. */
 740   if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 741     return irn;
 742
 743   if (get_irn_opcode(irn) == iro_Mod) {
 744     ir_node *c = get_Mod_right(irn);
 745     ir_node *block, *left;
 746     ir_mode *mode;
 747     tarval *tv, *ntv;
 748     dbg_info *dbg;
 749     int n, bits;
 750     int k;
 751
 752     if (get_irn_op(c) != op_Const)
 753       return irn;
 754
 755     tv = get_Const_tarval(c);
 756
 757     /* check for division by zero */
 758     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 759       return irn;
 760
 761     left  = get_Mod_left(irn);
 762     mode  = get_irn_mode(left);
 763     block = get_irn_n(irn, -1);
 764     dbg   = get_irn_dbg_info(irn);
 765     bits = get_mode_size_bits(mode);
 766     n    = (bits + 7) / 8;
 767
 768     k = -1;
 769     if (mode_is_signed(mode)) {
 770       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 771       ntv = tarval_neg(tv);
 772       k = tv_ld2(ntv, n);
 773     }
 774
 775     if (k < 0) {
 776       k = tv_ld2(tv, n);
 777     }
 778
 779     if (k >= 0) {
 780       /* division by 2^k or -2^k:
 781        * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 782        */
 783       if (mode_is_signed(mode)) {
 784         ir_node *k_node;
 785         ir_node *curr = left;
 786
 787         if (k != 1) {
 788           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 789           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 790         }
 791
 792         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 793         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 794
 795         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 796
 797         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 798         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 799
 800         res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 801       }
 802       else {      /* unsigned case */
 803         ir_node *k_node;
 804
 805         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 806         res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 807       }
 808     }
 809     else {
 810       /* other constant */
 811       if (allow_Mulh(mode)) {
 812         res = replace_div_by_mulh(irn, tv);
 813
 814         res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 815
 816         /* res = arch_dep_mul_to_shift(res); */
 817
 818         res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 819       }
 820     }
 821   }
 822
 823   if (res != irn)
 824     hook_arch_dep_replace_division_by_const(irn);
 825
 826   return res;
 827 }
 828
 829 /* Replace DivMods with Shifts and Add/Subs and Mulh. */
 830 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
 831 {
 832   *div = *mod = NULL;
 833
 834   /* If the architecture dependent optimizations were not initialized
 835      or this optimization was not enabled. */
 836   if (params == NULL ||
 837       ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 838     return;
 839
 840   if (get_irn_opcode(irn) == iro_DivMod) {
 841     ir_node *c = get_DivMod_right(irn);
 842     ir_node *block, *left;
 843     ir_mode *mode;
 844     tarval *tv, *ntv;
 845     dbg_info *dbg;
 846     int n, bits;
 847     int k, n_flag;
 848
 849     if (get_irn_op(c) != op_Const)
 850       return;
 851
 852     tv = get_Const_tarval(c);
 853
 854     /* check for division by zero */
 855     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 856       return;
 857
 858     left  = get_DivMod_left(irn);
 859     mode  = get_irn_mode(left);
 860     block = get_irn_n(irn, -1);
 861     dbg   = get_irn_dbg_info(irn);
 862
 863     bits = get_mode_size_bits(mode);
 864     n    = (bits + 7) / 8;
 865
 866     k = -1;
 867     if (mode_is_signed(mode)) {
 868       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 869       ntv = tarval_neg(tv);
 870       n_flag = 1;
 871       k = tv_ld2(ntv, n);
 872     }
 873
 874     if (k < 0) {
 875       n_flag = 0;
 876       k = tv_ld2(tv, n);
 877     }
 878
 879     if (k >= 0) { /* division by 2^k or -2^k */
 880       if (mode_is_signed(mode)) {
 881         ir_node *k_node, *c_k;
 882         ir_node *curr = left;
 883
 884         if (k != 1) {
 885           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 886           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 887         }
 888
 889         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 890         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 891
 892         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 893
 894         c_k    = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 895
 896         *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 897
 898         if (n_flag) { /* negate the div result */
 899           ir_node *k_node;
 900
 901           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 902           *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 903         }
 904
 905         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 906         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 907
 908         *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 909       }
 910       else {      /* unsigned case */
 911         ir_node *k_node;
 912
 913         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 914         *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 915
 916         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 917         *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 918       }
 919     }
 920     else {
 921       /* other constant */
 922       if (allow_Mulh(mode)) {
 923         ir_node *t;
 924
 925         *div = replace_div_by_mulh(irn, tv);
 926
 927         t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 928
 929         /* t = arch_dep_mul_to_shift(t); */
 930
 931         *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 932       }
 933     }
 934   }
 935
 936   if (*div)
 937     hook_arch_dep_replace_division_by_const(irn);
 938 }
 939
 940
 941 static const arch_dep_params_t default_params = {
 942   1,  /* also use subs */
 943   4,  /* maximum shifts */
 944   31, /* maximum shift amount */
 945
 946   0,  /* allow Mulhs */
 947   0,  /* allow Mulus */
 948   32  /* Mulh allowed up to 32 bit */
 949 };
 950
 951 /* A default parameter factory for testing purposes. */
 952 const arch_dep_params_t *arch_dep_default_factory(void) {
 953   return &default_params;
 954 }