nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /**
   2  * @file irarch.c
   3  * @date 28.9.2004
   4  * @author Sebastian Hack
   5  * @brief Machine dependent firm optimizations.
   6  *
   7  * $Id$
   8  */
   9 #include <stdlib.h>
  10 #include <assert.h>
  11
  12 #include "irnode_t.h"
  13 #include "irgraph_t.h"
  14 #include "irmode_t.h"
  15 #include "iropt_t.h"
  16 #include "ircons_t.h"
  17 #include "irgmod.h"
  18 #include "irvrfy.h"
  19 #include "tv_t.h"
  20 #include "dbginfo_t.h"
  21 #include "iropt_dbg.h"
  22 #include "irflag_t.h"
  23 #include "irhooks.h"
  24 #include "ircons.h"
  25 #include "irarch.h"
  26 #include "irreflect.h"
  27
  28 #undef DEB
  29
  30 #define MAX_BITSTR 64
  31
  32 /* when we need verifying */
  33 #ifdef NDEBUG
  34 # define IRN_VRFY_IRG(res, irg)
  35 #else
  36 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  37 #endif
  38
  39 /** The params got from the factory in arch_dep_init(...). */
  40 static const arch_dep_params_t *params = NULL;
  41
  42 /** The bit mask, which optimizations to apply. */
  43 static arch_dep_opts_t opts;
  44
  45 /* we need this new pseudo op */
  46 static ir_op *op_Mulh = NULL;
  47
  48 /**
  49  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  50  */
  51 static ir_node *
  52 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  53        ir_node *op1, ir_node *op2, ir_mode *mode)
  54 {
  55   ir_node *in[2];
  56   ir_node *res;
  57
  58   in[0] = op1;
  59   in[1] = op2;
  60   res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  61   res = optimize_node(res);
  62   IRN_VRFY_IRG(res, irg);
  63   return res;
  64 }
  65
  66 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  67
  68 void arch_dep_init(arch_dep_params_factory_t factory)
  69 {
  70   opts = arch_dep_none;
  71
  72   if (factory != NULL)
  73     params = factory();
  74
  75   if (! op_Mulh) {
  76     rflct_sig_t *sig;
  77     int mulh_opc = get_next_ir_opcode();
  78
  79     /* create the Mulh operation */
  80     op_Mulh = new_ir_op(mulh_opc, "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0, NULL);
  81     sig = rflct_signature_allocate(1, 3);
  82     rflct_signature_set_arg(sig, 0, 0, "Res", RFLCT_MC(Int), 0, 0);
  83     rflct_signature_set_arg(sig, 1, 0, "Block", RFLCT_MC(BB), 0, 0);
  84     rflct_signature_set_arg(sig, 1, 1, "Op 0", RFLCT_MC(Int), 0, 0);
  85     rflct_signature_set_arg(sig, 1, 2, "Op 1", RFLCT_MC(Int), 0, 0);
  86
  87     rflct_new_opcode(mulh_opc, "Mulh", false);
  88     rflct_opcode_add_signature(mulh_opc, sig);
  89   }
  90 }
  91
  92 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
  93   opts = the_opts;
  94 }
  95
  96 /* check, whether a mode allows a Mulh instruction */
  97 static int allow_Mulh(ir_mode *mode)
  98 {
  99   if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
 100     return 0;
 101   return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
 102 }
 103
 104 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
 105 {
 106   ir_node *res = irn;
 107   ir_mode *mode = get_irn_mode(irn);
 108
 109   /* If the architecture dependent optimizations were not initialized
 110      or this optimization was not enabled. */
 111   if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
 112     return irn;
 113
 114   if (get_irn_op(irn) == op_Mul && mode_is_int(mode)) {
 115     ir_node *block   = get_irn_n(irn, -1);
 116     ir_node *left    = get_binop_left(irn);
 117     ir_node *right   = get_binop_right(irn);
 118     tarval *tv       = NULL;
 119     ir_node *operand = NULL;
 120
 121     /* Look, if one operand is a constant. */
 122     if (get_irn_opcode(left) == iro_Const) {
 123       tv = get_Const_tarval(left);
 124       operand = right;
 125     } else if(get_irn_opcode(right) == iro_Const) {
 126       tv = get_Const_tarval(right);
 127       operand = left;
 128     }
 129
 130     if (tv != NULL) {
 131       int maximum_shifts = params->maximum_shifts;
 132       int also_use_subs = params->also_use_subs;
 133       int highest_shift_amount = params->highest_shift_amount;
 134
 135       char *bitstr = get_tarval_bitpattern(tv);
 136       char *p;
 137       int i, last = 0;
 138       int counter = 0;
 139       int curr_bit;
 140       int compr_len = 0;
 141       char compr[MAX_BITSTR];
 142
 143       int singleton;
 144       int end_of_group;
 145       int shift_with_sub[MAX_BITSTR] = { 0 };
 146       int shift_without_sub[MAX_BITSTR] = { 0 };
 147       int shift_with_sub_pos = 0;
 148       int shift_without_sub_pos = 0;
 149
 150 #if DEB
 151       {
 152         long val = get_tarval_long(tv);
 153         fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 154         for(p = bitstr; *p != '\0'; p++)
 155           printf("%c", *p);
 156         printf("\n");
 157       }
 158 #endif
 159
 160       for(p = bitstr; *p != '\0'; p++) {
 161         int bit = *p != '0';
 162
 163         if (bit != last) {
 164           /* The last was 1 we are now at 0 OR
 165            * The last was 0 and we are now at 1 */
 166           compr[compr_len++] = counter;
 167           counter = 1;
 168         }
 169         else
 170           counter++;
 171
 172         last = bit;
 173       }
 174       compr[compr_len++] = counter;
 175
 176
 177 #ifdef DEB
 178       {
 179         const char *prefix = "";
 180         for(i = 0; i < compr_len; i++, prefix = ",")
 181           fprintf(stderr, "%s%d", prefix, compr[i]);
 182         fprintf("\n");
 183       }
 184 #endif
 185
 186       // Go over all recorded one groups.
 187       curr_bit = compr[0];
 188
 189       for(i = 1; i < compr_len; i = end_of_group + 2) {
 190         int j, zeros_in_group, ones_in_group;
 191
 192         ones_in_group = compr[i];
 193         zeros_in_group = 0;
 194
 195         // Scan for singular 0s in a sequence
 196         for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 197           zeros_in_group += 1;
 198           ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 199         }
 200         end_of_group = j - 1;
 201
 202         if(zeros_in_group >= ones_in_group - 1)
 203           end_of_group = i;
 204
 205 #ifdef DEB
 206         fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 207 #endif
 208
 209         singleton = compr[i] == 1 && i == end_of_group;
 210         for(j = i; j <= end_of_group; j += 2) {
 211           int curr_ones = compr[j];
 212           int biased_curr_bit = curr_bit + 1;
 213           int k;
 214
 215 #ifdef DEB
 216           fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 217 #endif
 218
 219           // If this ones group is a singleton group (it has no
 220           // singleton zeros inside
 221           if(singleton)
 222             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 223           else if(j == i)
 224             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 225
 226           for(k = 0; k < curr_ones; k++)
 227             shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 228
 229           curr_bit += curr_ones;
 230           biased_curr_bit = curr_bit + 1;
 231
 232           if(!singleton && j == end_of_group)
 233             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 234           else if(j != end_of_group)
 235             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 236
 237           curr_bit += compr[j + 1];
 238         }
 239
 240       }
 241
 242       {
 243         int *shifts = shift_with_sub;
 244         int n = shift_with_sub_pos;
 245         int highest_shift_wide = 0;
 246         int highest_shift_seq = 0;
 247         int last_shift = 0;
 248
 249         /* If we may not use subs, or we can achive the same with adds,
 250            prefer adds. */
 251         if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 252           shifts = shift_without_sub;
 253           n = shift_without_sub_pos;
 254         }
 255
 256         /* If the number of needed shifts exceeds the given maximum,
 257            use the Mul and exit. */
 258         if(n > maximum_shifts) {
 259 #ifdef DEB
 260           fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 261               maximum_shifts, n);
 262 #endif
 263           return irn;
 264         }
 265
 266         /* Compute the highest shift needed for both, the
 267            sequential and wide representations. */
 268         for(i = 0; i < n; i++) {
 269           int curr = abs(shifts[i]);
 270           int curr_seq = curr - last;
 271
 272           highest_shift_wide = curr > highest_shift_wide ? curr
 273             : highest_shift_wide;
 274           highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
 275             : highest_shift_seq;
 276
 277           last_shift = curr;
 278         }
 279
 280         /* If the highest shift amount is greater than the given limit,
 281            give back the Mul */
 282         if(highest_shift_seq > highest_shift_amount) {
 283 #ifdef DEB
 284           fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 285               highest_shift_seq, highest_shift_amount);
 286 #endif
 287           return irn;
 288         }
 289
 290         /* If we have subs, we cannot do sequential. */
 291         if(1 /* also_use_subs */) {
 292           if(n > 0) {
 293             ir_node *curr = NULL;
 294
 295             i = n - 1;
 296
 297             do {
 298               int curr_shift = shifts[i];
 299               int sub = curr_shift < 0;
 300               int amount = abs(curr_shift) - 1;
 301               ir_node *aux = operand;
 302
 303               assert(amount >= 0 && "What is a negative shift??");
 304
 305               if (amount != 0) {
 306                 ir_node *cnst = new_r_Const_long(current_ir_graph, block, mode_Iu, amount);
 307                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 308               }
 309
 310               if (curr) {
 311                 if (sub)
 312                   curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 313                 else
 314                   curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 315               } else
 316                 curr = aux;
 317
 318             } while(--i >= 0);
 319
 320             res = curr;
 321           }
 322         }
 323
 324 #ifdef DEB
 325         {
 326           const char *prefix = "";
 327           for (i = 0; i < n; ++i) {
 328             fprintf(stderr, "%s%d", prefix, shifts[i]);
 329             prefix = ", ";
 330           }
 331           fprintf(stderr, "\n");
 332         }
 333 #endif
 334
 335       }
 336
 337       if(bitstr)
 338         free(bitstr);
 339     }
 340
 341   }
 342
 343   if (res != irn)
 344     hook_arch_dep_replace_mul_with_shifts(irn);
 345
 346   return res;
 347 }
 348
 349 /**
 350  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 351  */
 352 static int tv_ld2(tarval *tv, int bits)
 353 {
 354   int i, k, num;
 355
 356   for (num = i = 0; i < bits; ++i) {
 357     unsigned char v = get_tarval_sub_bits(tv, i);
 358
 359     if (v) {
 360       int j;
 361
 362       for (j = 0; j < 8; ++j)
 363         if ((1 << j) & v) {
 364           ++num;
 365           k = 8 * i + j;
 366         }
 367     }
 368   }
 369   if (num == 1)
 370     return k;
 371   return -1;
 372 }
 373
 374
 375 /* for shorter lines */
 376 #define ABS(a)    tarval_abs(a)
 377 #define NEG(a)    tarval_neg(a)
 378 #define NOT(a)    tarval_not(a)
 379 #define SHL(a, b) tarval_shl(a, b)
 380 #define SHR(a, b) tarval_shr(a, b)
 381 #define ADD(a, b) tarval_add(a, b)
 382 #define SUB(a, b) tarval_sub(a, b)
 383 #define MUL(a, b) tarval_mul(a, b)
 384 #define DIV(a, b) tarval_div(a, b)
 385 #define MOD(a, b) tarval_mod(a, b)
 386 #define CMP(a, b) tarval_cmp(a, b)
 387 #define CNV(a, m) tarval_convert_to(a, m)
 388 #define ONE(m)    get_mode_one(m)
 389 #define ZERO(m)   get_mode_null(m)
 390
 391 struct ms {
 392   tarval *M;        /**< magic number */
 393   int s;            /**< shift amount */
 394   int need_add;     /**< an additional add is needed */
 395   int need_sub;     /**< an additional sub is needed */
 396 };
 397
 398 /**
 399  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 400  *
 401  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 402  */
 403 static struct ms magic(tarval *d)
 404 {
 405   ir_mode *mode   = get_tarval_mode(d);
 406   ir_mode *u_mode = find_unsigned_mode(mode);
 407   int bits        = get_mode_size_bits(u_mode);
 408   int p;
 409   tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 410   pn_Cmp d_cmp, M_cmp;
 411
 412   tarval *bits_minus_1, *two_bits_1;
 413
 414   struct ms mag;
 415
 416   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 417
 418   /* we need overflow mode to work correctly */
 419   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 420
 421   /* 2^(bits-1) */
 422   bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 423   two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 424
 425   ad  = CNV(ABS(d), u_mode);
 426   t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 427   anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 428   p   = bits - 1;                               /* Init: p */
 429   q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 430   r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 431   q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 432   r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 433
 434   do {
 435     ++p;
 436     q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 437     r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 438
 439     if (CMP(r1, anc) & pn_Cmp_Ge) {
 440       q1 = ADD(q1, ONE(u_mode));
 441       r1 = SUB(r1, anc);
 442     }
 443
 444     q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 445     r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 446
 447     if (CMP(r2, ad) & pn_Cmp_Ge) {
 448       q2 = ADD(q2, ONE(u_mode));
 449       r2 = SUB(r2, ad);
 450     }
 451
 452     delta = SUB(ad, r2);
 453   } while (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(u_mode)) & pn_Cmp_Eq));
 454
 455   d_cmp = CMP(d, ZERO(mode));
 456
 457   if (d_cmp & pn_Cmp_Ge)
 458     mag.M = ADD(CNV(q2, mode), ONE(mode));
 459   else
 460     mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 461
 462   M_cmp = CMP(mag.M, ZERO(mode));
 463
 464   mag.s = p - bits;
 465
 466   /* need an add if d > 0 && M < 0 */
 467   mag.need_add = d_cmp & pn_Cmp_Gt && M_cmp & pn_Cmp_Lt;
 468
 469   /* need a sub if d < 0 && M > 0 */
 470   mag.need_sub = d_cmp & pn_Cmp_Lt && M_cmp & pn_Cmp_Gt;
 471
 472   tarval_set_integer_overflow_mode(rem);
 473
 474   return mag;
 475 }
 476
 477 struct mu {
 478   tarval *M;        /**< magic add constant */
 479   int s;            /**< shift amount */
 480   int need_add;     /**< add indicator */
 481 };
 482
 483 /**
 484  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 485  *
 486  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 487  */
 488 static struct mu magicu(tarval *d)
 489 {
 490   ir_mode *mode   = get_tarval_mode(d);
 491   int bits        = get_mode_size_bits(mode);
 492   int p;
 493   tarval *nc, *delta, *q1, *r1, *q2, *r2;
 494   tarval *bits_minus_1, *two_bits_1, *seven_ff;
 495
 496   struct mu magu;
 497
 498   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 499
 500   /* we need overflow mode to work correctly */
 501   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 502
 503   bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 504   two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 505   seven_ff     = SUB(two_bits_1, ONE(mode));
 506
 507   magu.need_add = 0;                            /* initialize the add indicator */
 508   nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 509   p  = bits - 1;                                /* Init: p */
 510   q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 511   r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 512   q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 513   r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 514
 515   do {
 516     ++p;
 517     if (CMP(r1, SUB(nc, r1)) & pn_Cmp_Ge) {
 518       q1 = ADD(ADD(q1, q1), ONE(mode));
 519       r1 = SUB(ADD(r1, r1), nc);
 520     }
 521     else {
 522       q1 = ADD(q1, q1);
 523       r1 = ADD(r1, r1);
 524     }
 525
 526     if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & pn_Cmp_Ge) {
 527       if (CMP(q2, seven_ff) & pn_Cmp_Ge)
 528         magu.need_add = 1;
 529
 530       q2 = ADD(ADD(q2, q2), ONE(mode));
 531       r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 532     }
 533     else {
 534       if (CMP(q2, two_bits_1) & pn_Cmp_Ge)
 535         magu.need_add = 1;
 536
 537       q2 = ADD(q2, q2);
 538       r2 = ADD(ADD(r2, r2), ONE(mode));
 539     }
 540     delta = SUB(SUB(d, ONE(mode)), r2);
 541   } while (p < 2*bits &&
 542           (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(mode)) & pn_Cmp_Eq)));
 543
 544   magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 545   magu.s = p - bits;                                 /* and shift amount */
 546
 547   tarval_set_integer_overflow_mode(rem);
 548
 549   return magu;
 550 }
 551
 552 /**
 553  * build the Mulh replacement code for n / tv
 554  *
 555  * Note that 'div' might be a mod or DivMod operation as well
 556  */
 557 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
 558 {
 559   dbg_info *dbg  = get_irn_dbg_info(div);
 560   ir_node *n     = get_binop_left(div);
 561   ir_node *block = get_irn_n(div, -1);
 562   ir_mode *mode  = get_irn_mode(n);
 563   int bits       = get_mode_size_bits(mode);
 564   ir_node *q, *t, *c;
 565
 566   /* Beware: do not transform bad code */
 567   if (is_Bad(n) || is_Bad(block))
 568     return div;
 569
 570   if (mode_is_signed(mode)) {
 571     struct ms mag = magic(tv);
 572
 573     /* generate the Mulh instruction */
 574     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 575     q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 576
 577     /* do we need an Add or Sub */
 578     if (mag.need_add)
 579       q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 580     else if (mag.need_sub)
 581       q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 582
 583     /* Do we need the shift */
 584     if (mag.s > 0) {
 585       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 586       q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 587     }
 588
 589     /* final */
 590     c = new_r_Const_long(current_ir_graph, block, mode_Iu, bits-1);
 591     t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 592
 593     q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 594   }
 595   else {
 596     struct mu mag = magicu(tv);
 597     ir_node *c;
 598
 599     /* generate the Mulh instruction */
 600     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 601     q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 602
 603     if (mag.need_add) {
 604       if (mag.s > 0) {
 605         /* use the GM scheme */
 606         t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 607
 608         c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 609         t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 610
 611         t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 612
 613         c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s-1);
 614         q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 615       }
 616       else {
 617         /* use the default scheme */
 618         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 619       }
 620     }
 621     else if (mag.s > 0) { /* default scheme, shift needed */
 622       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 623       q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 624     }
 625   }
 626   return q;
 627 }
 628
 629 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
 630 {
 631   ir_node *res  = irn;
 632
 633   /* If the architecture dependent optimizations were not initialized
 634      or this optimization was not enabled. */
 635   if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 636     return irn;
 637
 638   if (get_irn_opcode(irn) == iro_Div) {
 639     ir_node *c = get_Div_right(irn);
 640     ir_node *block, *left;
 641     ir_mode *mode;
 642     tarval *tv, *ntv;
 643     dbg_info *dbg;
 644     int n, bits;
 645     int k, n_flag;
 646
 647     if (get_irn_op(c) != op_Const)
 648       return irn;
 649
 650     left  = get_Div_left(irn);
 651     mode  = get_irn_mode(left);
 652     block = get_irn_n(irn, -1);
 653     dbg   = get_irn_dbg_info(irn);
 654     tv    = get_Const_tarval(c);
 655
 656     bits = get_mode_size_bits(mode);
 657     n    = (bits + 7) / 8;
 658
 659     k = -1;
 660     if (mode_is_signed(mode)) {
 661       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 662       ntv = tarval_neg(tv);
 663       n_flag = 1;
 664       k = tv_ld2(ntv, n);
 665     }
 666
 667     if (k < 0) {
 668       n_flag = 0;
 669       k = tv_ld2(tv, n);
 670     }
 671
 672     if (k >= 0) { /* division by 2^k or -2^k */
 673       if (mode_is_signed(mode)) {
 674         ir_node *k_node;
 675         ir_node *curr = left;
 676
 677         if (k != 1) {
 678           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 679           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 680         }
 681
 682         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 683         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 684
 685         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 686
 687         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 688         res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 689
 690         if (n_flag) { /* negate the result */
 691           ir_node *k_node;
 692
 693           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 694           res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 695         }
 696       }
 697       else {      /* unsigned case */
 698         ir_node *k_node;
 699
 700         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 701         res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 702       }
 703     }
 704     else {
 705       /* other constant */
 706       if (allow_Mulh(mode))
 707         res = replace_div_by_mulh(irn, tv);
 708     }
 709   }
 710
 711   if (res != irn)
 712     hook_arch_dep_replace_div_by_const(irn);
 713
 714   return res;
 715 }
 716
 717 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
 718 {
 719   ir_node *res  = irn;
 720
 721   /* If the architecture dependent optimizations were not initialized
 722      or this optimization was not enabled. */
 723   if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 724     return irn;
 725
 726   if (get_irn_opcode(irn) == iro_Mod) {
 727     ir_node *c = get_Mod_right(irn);
 728     ir_node *block, *left;
 729     ir_mode *mode;
 730     tarval *tv, *ntv;
 731     dbg_info *dbg;
 732     int n, bits;
 733     int k;
 734
 735     if (get_irn_op(c) != op_Const)
 736       return irn;
 737
 738     left  = get_Mod_left(irn);
 739     mode  = get_irn_mode(left);
 740     block = get_irn_n(irn, -1);
 741     dbg   = get_irn_dbg_info(irn);
 742     tv    = get_Const_tarval(c);
 743
 744     bits = get_mode_size_bits(mode);
 745     n    = (bits + 7) / 8;
 746
 747     k = -1;
 748     if (mode_is_signed(mode)) {
 749       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 750       ntv = tarval_neg(tv);
 751       k = tv_ld2(ntv, n);
 752     }
 753
 754     if (k < 0) {
 755       k = tv_ld2(tv, n);
 756     }
 757
 758     if (k >= 0) {
 759       /* division by 2^k or -2^k:
 760        * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 761        */
 762       if (mode_is_signed(mode)) {
 763         ir_node *k_node;
 764         ir_node *curr = left;
 765
 766         if (k != 1) {
 767           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 768           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 769         }
 770
 771         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 772         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 773
 774         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 775
 776         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 777         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 778
 779         res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 780       }
 781       else {      /* unsigned case */
 782         ir_node *k_node;
 783
 784         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 785         res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 786       }
 787     }
 788     else {
 789       /* other constant */
 790       if (allow_Mulh(mode)) {
 791         res = replace_div_by_mulh(irn, tv);
 792
 793         res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 794
 795         /* res = arch_dep_mul_to_shift(res); */
 796
 797         res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 798       }
 799     }
 800   }
 801
 802   if (res != irn)
 803     hook_arch_dep_replace_mod_by_const(irn);
 804
 805   return res;
 806 }
 807
 808 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
 809 {
 810   *div = *mod = NULL;
 811
 812   /* If the architecture dependent optimizations were not initialized
 813      or this optimization was not enabled. */
 814   if (params == NULL ||
 815       ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 816     return;
 817
 818   if (get_irn_opcode(irn) == iro_DivMod) {
 819     ir_node *c = get_DivMod_right(irn);
 820     ir_node *block, *left;
 821     ir_mode *mode;
 822     tarval *tv, *ntv;
 823     dbg_info *dbg;
 824     int n, bits;
 825     int k, n_flag;
 826
 827     if (get_irn_op(c) != op_Const)
 828       return;
 829
 830     left  = get_DivMod_left(irn);
 831     mode  = get_irn_mode(left);
 832     block = get_irn_n(irn, -1);
 833     dbg   = get_irn_dbg_info(irn);
 834     tv    = get_Const_tarval(c);
 835
 836     bits = get_mode_size_bits(mode);
 837     n    = (bits + 7) / 8;
 838
 839     k = -1;
 840     if (mode_is_signed(mode)) {
 841       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 842       ntv = tarval_neg(tv);
 843       n_flag = 1;
 844       k = tv_ld2(ntv, n);
 845     }
 846
 847     if (k < 0) {
 848       n_flag = 0;
 849       k = tv_ld2(tv, n);
 850     }
 851
 852     if (k >= 0) { /* division by 2^k or -2^k */
 853       if (mode_is_signed(mode)) {
 854         ir_node *k_node, *c_k;
 855         ir_node *curr = left;
 856
 857         if (k != 1) {
 858           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 859           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 860         }
 861
 862         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 863         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 864
 865         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 866
 867         c_k    = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 868
 869         *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 870
 871         if (n_flag) { /* negate the div result */
 872           ir_node *k_node;
 873
 874           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 875           *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 876         }
 877
 878         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 879         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 880
 881         *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 882       }
 883       else {      /* unsigned case */
 884         ir_node *k_node;
 885
 886         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 887         *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 888
 889         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 890         *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 891       }
 892     }
 893     else {
 894       /* other constant */
 895       if (allow_Mulh(mode)) {
 896         ir_node *t;
 897
 898         *div = replace_div_by_mulh(irn, tv);
 899
 900         t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 901
 902         /* t = arch_dep_mul_to_shift(t); */
 903
 904         *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 905       }
 906     }
 907   }
 908
 909   if (*div)
 910     hook_arch_dep_replace_DivMod_by_const(irn);
 911 }
 912
 913
 914 static const arch_dep_params_t default_params = {
 915   1,  /* also use subs */
 916   4,  /* maximum shifts */
 917   31, /* maximum shift amount */
 918
 919   0,  /* allow Mulhs */
 920   0,  /* allow Mulus */
 921   32  /* Mulh allowed up to 32 bit */
 922 };
 923
 924 const arch_dep_params_t *arch_dep_default_factory(void) {
 925   return &default_params;
 926 }