nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /**
   2  * @file irarch.c
   3  * @date 28.9.2004
   4  * @author Sebastian Hack
   5  * @brief Machine dependent firm optimizations.
   6  *
   7  * $Id$
   8  */
   9 #include <stdlib.h>
  10 #include <assert.h>
  11
  12 #include "irnode_t.h"
  13 #include "irgraph_t.h"
  14 #include "irmode_t.h"
  15 #include "iropt_t.h"
  16 #include "ircons_t.h"
  17 #include "irgmod.h"
  18 #include "irvrfy.h"
  19 #include "tv.h"
  20 #include "dbginfo_t.h"
  21 #include "iropt_dbg.h"
  22 #include "irflag_t.h"
  23 #include "firmstat.h"
  24 #include "ircons.h"
  25 #include "irarch.h"
  26 #include "firmstat.h"
  27
  28 #undef DEB
  29
  30 #define MAX_BITSTR 64
  31
  32 /* when we need verifying */
  33 #ifdef NDEBUG
  34 # define IRN_VRFY_IRG(res, irg)
  35 #else
  36 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  37 #endif
  38
  39 /** The params got from the factory in arch_dep_init(...). */
  40 static const arch_dep_params_t *params = NULL;
  41
  42 /** The bit mask, which optimizations to apply. */
  43 static arch_dep_opts_t opts;
  44
  45 /* we need this new pseudo op */
  46 static ir_op *op_Mulh = NULL;
  47
  48 /**
  49  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  50  */
  51 static ir_node *
  52 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  53        ir_node *op1, ir_node *op2, ir_mode *mode)
  54 {
  55   ir_node *in[2];
  56   ir_node *res;
  57
  58   if (! op_Mulh) {
  59     op_Mulh = new_ir_op(get_next_ir_opcode(), "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0);
  60   }
  61
  62   in[0] = op1;
  63   in[1] = op2;
  64   res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  65   res = optimize_node(res);
  66   IRN_VRFY_IRG(res, irg);
  67   return res;
  68 }
  69
  70 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  71
  72 void arch_dep_init(arch_dep_params_factory_t factory)
  73 {
  74   opts = arch_dep_none;
  75
  76   if (factory != NULL)
  77     params = factory();
  78
  79   if (params && (opts & (arch_dep_div_by_const|arch_dep_mod_by_const))) {
  80     if (! op_Mulh) {
  81       /* create the Mulh operation */
  82       op_Mulh = new_ir_op(get_next_ir_opcode(), "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0);
  83     }
  84   }
  85 }
  86
  87 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
  88   opts = the_opts;
  89 }
  90
  91 /* check, wheater a mode allows a Mulh instruction */
  92 static int allow_Mulh(ir_mode *mode)
  93 {
  94   if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
  95     return 0;
  96   return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
  97 }
  98
  99 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
 100 {
 101   ir_node *res = irn;
 102   ir_mode *mode = get_irn_mode(irn);
 103
 104   /* If the architecture dependent optimizations were not initialized
 105      or this optimization was not enabled. */
 106   if(params == NULL || (opts & arch_dep_mul_to_shift) == 0)
 107     return irn;
 108
 109   if(get_irn_opcode(irn) == iro_Mul && mode_is_int(mode)) {
 110     ir_node *block   = get_nodes_block(irn);
 111     ir_node *left    = get_binop_left(irn);
 112     ir_node *right   = get_binop_right(irn);
 113     tarval *tv       = NULL;
 114     ir_node *operand = NULL;
 115
 116     /* Look, if one operand is a constant. */
 117     if(get_irn_opcode(left) == iro_Const) {
 118       tv = get_Const_tarval(left);
 119       operand = right;
 120     } else if(get_irn_opcode(right) == iro_Const) {
 121       tv = get_Const_tarval(right);
 122       operand = left;
 123     }
 124
 125     if(tv != NULL) {
 126       int maximum_shifts = params->maximum_shifts;
 127       int also_use_subs = params->also_use_subs;
 128       int highest_shift_amount = params->highest_shift_amount;
 129
 130       char *bitstr = get_tarval_bitpattern(tv);
 131       char *p;
 132       int i, last = 0;
 133       int counter = 0;
 134       int curr_bit;
 135       int compr_len = 0;
 136       char compr[MAX_BITSTR];
 137
 138       int singleton;
 139       int end_of_group;
 140       int shift_with_sub[MAX_BITSTR] = { 0 };
 141       int shift_without_sub[MAX_BITSTR] = { 0 };
 142       int shift_with_sub_pos = 0;
 143       int shift_without_sub_pos = 0;
 144
 145 #if DEB
 146       {
 147         long val = get_tarval_long(tv);
 148         fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 149         for(p = bitstr; *p != '\0'; p++)
 150           printf("%c", *p);
 151         printf("\n");
 152       }
 153 #endif
 154
 155       for(p = bitstr; *p != '\0'; p++) {
 156         int bit = *p != '0';
 157
 158         if (bit != last) {
 159           /* The last was 1 we are now at 0 OR
 160            * The last was 0 and we are now at 1 */
 161           compr[compr_len++] = counter;
 162           counter = 1;
 163         }
 164         else
 165           counter++;
 166
 167         last = bit;
 168       }
 169       compr[compr_len++] = counter;
 170
 171
 172 #ifdef DEB
 173       {
 174         const char *prefix = "";
 175         for(i = 0; i < compr_len; i++, prefix = ",")
 176           fprintf(stderr, "%s%d", prefix, compr[i]);
 177         fprintf("\n");
 178       }
 179 #endif
 180
 181       // Go over all recorded one groups.
 182       curr_bit = compr[0];
 183
 184       for(i = 1; i < compr_len; i = end_of_group + 2) {
 185         int j, zeros_in_group, ones_in_group;
 186
 187         ones_in_group = compr[i];
 188         zeros_in_group = 0;
 189
 190         // Scan for singular 0s in a sequence
 191         for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 192           zeros_in_group += 1;
 193           ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 194         }
 195         end_of_group = j - 1;
 196
 197         if(zeros_in_group >= ones_in_group - 1)
 198           end_of_group = i;
 199
 200 #ifdef DEB
 201         fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 202 #endif
 203
 204         singleton = compr[i] == 1 && i == end_of_group;
 205         for(j = i; j <= end_of_group; j += 2) {
 206           int curr_ones = compr[j];
 207           int biased_curr_bit = curr_bit + 1;
 208           int k;
 209
 210 #ifdef DEB
 211           fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 212 #endif
 213
 214           // If this ones group is a singleton group (it has no
 215           // singleton zeros inside
 216           if(singleton)
 217             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 218           else if(j == i)
 219             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 220
 221           for(k = 0; k < curr_ones; k++)
 222             shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 223
 224           curr_bit += curr_ones;
 225           biased_curr_bit = curr_bit + 1;
 226
 227           if(!singleton && j == end_of_group)
 228             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 229           else if(j != end_of_group)
 230             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 231
 232           curr_bit += compr[j + 1];
 233         }
 234
 235       }
 236
 237       {
 238         int *shifts = shift_with_sub;
 239         int n = shift_with_sub_pos;
 240         int highest_shift_wide = 0;
 241         int highest_shift_seq = 0;
 242         int last_shift = 0;
 243
 244         /* If we may not use subs, or we can achive the same with adds,
 245            prefer adds. */
 246         if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 247           shifts = shift_without_sub;
 248           n = shift_without_sub_pos;
 249         }
 250
 251         /* If the number of needed shifts exceeds the given maximum,
 252            use the Mul and exit. */
 253         if(n > maximum_shifts) {
 254 #ifdef DEB
 255           fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 256               maximum_shifts, n);
 257 #endif
 258           return irn;
 259         }
 260
 261         /* Compute the highest shift needed for both, the
 262            sequential and wide representations. */
 263         for(i = 0; i < n; i++) {
 264           int curr = abs(shifts[i]);
 265           int curr_seq = curr - last;
 266
 267           highest_shift_wide = curr > highest_shift_wide ? curr
 268             : highest_shift_wide;
 269           highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
 270             : highest_shift_seq;
 271
 272           last_shift = curr;
 273         }
 274
 275         /* If the highest shift amount is greater than the given limit,
 276            give back the Mul */
 277         if(highest_shift_seq > highest_shift_amount) {
 278 #ifdef DEB
 279           fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 280               highest_shift_seq, highest_shift_amount);
 281 #endif
 282           return irn;
 283         }
 284
 285         /* If we have subs, we cannot do sequential. */
 286         if(1 /* also_use_subs */) {
 287           if(n > 0) {
 288             ir_node *curr = NULL;
 289
 290             i = n - 1;
 291
 292             do {
 293               int curr_shift = shifts[i];
 294               int sub = curr_shift < 0;
 295               int amount = abs(curr_shift) - 1;
 296               ir_node *aux = operand;
 297
 298
 299               assert(amount >= 0 && "What is a negative shift??");
 300
 301               if(amount != 0) {
 302                 tarval *shift_amount = new_tarval_from_long(amount, mode_Iu);
 303                 ir_node *cnst = new_r_Const(current_ir_graph, block, mode_Iu, shift_amount);
 304                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 305               }
 306
 307               if(curr) {
 308                 if(sub)
 309                   curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 310                 else
 311                   curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 312               } else
 313                 curr = aux;
 314
 315             } while(--i >= 0);
 316
 317             res = curr;
 318           }
 319         }
 320
 321 #ifdef DEB
 322         {
 323           const char *prefix = "";
 324           for(i = 0; i < n; i++) {
 325             fprintf(stderr, "%s%d", prefix, shifts[i]);
 326             prefix = ", ";
 327           }
 328           fprintf(stderr, "\n");
 329         }
 330 #endif
 331
 332       }
 333
 334       if(bitstr)
 335         free(bitstr);
 336     }
 337
 338   }
 339
 340   if (res != irn)
 341     stat_arch_dep_replace_mul_with_shifts(irn);
 342
 343   return res;
 344 }
 345
 346 /**
 347  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 348  */
 349 static int tv_ld2(tarval *tv, int bits)
 350 {
 351   int i, k, num;
 352
 353   for (num = i = 0; i < bits; ++i) {
 354     unsigned char v = get_tarval_sub_bits(tv, i);
 355
 356     if (v) {
 357       int j;
 358
 359       for (j = 0; j < 8; ++j)
 360         if ((1 << j) & v) {
 361           ++num;
 362           k = 8 * i + j;
 363         }
 364     }
 365   }
 366   if (num == 1)
 367     return k;
 368   return -1;
 369 }
 370
 371
 372 /* for shorter lines */
 373 #define ABS(a)    tarval_abs(a)
 374 #define NEG(a)    tarval_neg(a)
 375 #define NOT(a)    tarval_not(a)
 376 #define SHL(a, b) tarval_shl(a, b)
 377 #define SHR(a, b) tarval_shr(a, b)
 378 #define ADD(a, b) tarval_add(a, b)
 379 #define SUB(a, b) tarval_sub(a, b)
 380 #define MUL(a, b) tarval_mul(a, b)
 381 #define DIV(a, b) tarval_div(a, b)
 382 #define MOD(a, b) tarval_mod(a, b)
 383 #define CMP(a, b) tarval_cmp(a, b)
 384 #define CNV(a, m) tarval_convert_to(a, m)
 385 #define ONE(m)    get_mode_one(m)
 386 #define ZERO(m)   get_mode_null(m)
 387
 388 struct ms {
 389   tarval *M;        /**< magic number */
 390   int s;            /**< shift amount */
 391   int need_add;     /**< an additional add is needed */
 392   int need_sub;     /**< an additional sub is needed */
 393 };
 394
 395 /**
 396  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 397  *
 398  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 399  */
 400 static struct ms magic(tarval *d)
 401 {
 402   ir_mode *mode   = get_tarval_mode(d);
 403   ir_mode *u_mode = find_unsigned_mode(mode);
 404   int bits        = get_mode_size_bits(u_mode);
 405   int p;
 406   tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 407   pnc_number d_cmp, M_cmp;
 408
 409   tarval *bits_minus_1, *two_bits_1;
 410
 411   struct ms mag;
 412
 413   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 414
 415   /* we need overflow mode to work correctly */
 416   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 417
 418   /* 2^(bits-1) */
 419   bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 420   two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 421
 422   ad  = CNV(ABS(d), u_mode);
 423   t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 424   anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 425   p   = bits - 1;                               /* Init: p */
 426   q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 427   r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 428   q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 429   r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 430
 431   do {
 432     ++p;
 433     q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 434     r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 435
 436     if (CMP(r1, anc) & Ge) {
 437       q1 = ADD(q1, ONE(u_mode));
 438       r1 = SUB(r1, anc);
 439     }
 440
 441     q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 442     r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 443
 444     if (CMP(r2, ad) & Ge) {
 445       q2 = ADD(q2, ONE(u_mode));
 446       r2 = SUB(r2, ad);
 447     }
 448
 449     delta = SUB(ad, r2);
 450   } while (CMP(q1, delta) & Lt || (CMP(q1, delta) & Eq && CMP(r1, ZERO(u_mode)) & Eq));
 451
 452   d_cmp = CMP(d, ZERO(mode));
 453
 454   if (d_cmp & Ge)
 455     mag.M = ADD(CNV(q2, mode), ONE(mode));
 456   else
 457     mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 458
 459   M_cmp = CMP(mag.M, ZERO(mode));
 460
 461   mag.s = p - bits;
 462
 463   /* need an add if d > 0 && M < 0 */
 464   mag.need_add = d_cmp & Gt && M_cmp & Lt;
 465
 466   /* need a sub if d < 0 && M > 0 */
 467   mag.need_sub = d_cmp & Lt && M_cmp & Gt;
 468
 469   tarval_set_integer_overflow_mode(rem);
 470
 471   return mag;
 472 }
 473
 474 struct mu {
 475   tarval *M;        /**< magic add constant */
 476   int s;            /**< shift amount */
 477   int need_add;     /**< add indicator */
 478 };
 479
 480 /**
 481  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 482  *
 483  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 484  */
 485 static struct mu magicu(tarval *d)
 486 {
 487   ir_mode *mode   = get_tarval_mode(d);
 488   int bits        = get_mode_size_bits(mode);
 489   int p;
 490   tarval *nc, *delta, *q1, *r1, *q2, *r2;
 491   tarval *bits_minus_1, *two_bits_1, *seven_ff;
 492
 493   struct mu magu;
 494
 495   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 496
 497   /* we need overflow mode to work correctly */
 498   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 499
 500   bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 501   two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 502   seven_ff     = SUB(two_bits_1, ONE(mode));
 503
 504   magu.need_add = 0;                            /* initialize the add indicator */
 505   nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 506   p  = bits - 1;                                /* Init: p */
 507   q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 508   r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 509   q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 510   r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 511
 512   do {
 513     ++p;
 514     if (CMP(r1, SUB(nc, r1)) & Ge) {
 515       q1 = ADD(ADD(q1, q1), ONE(mode));
 516       r1 = SUB(ADD(r1, r1), nc);
 517     }
 518     else {
 519       q1 = ADD(q1, q1);
 520       r1 = ADD(r1, r1);
 521     }
 522
 523     if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & Ge) {
 524       if (CMP(q2, seven_ff) & Ge)
 525         magu.need_add = 1;
 526
 527       q2 = ADD(ADD(q2, q2), ONE(mode));
 528       r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 529     }
 530     else {
 531       if (CMP(q2, two_bits_1) & Ge)
 532         magu.need_add = 1;
 533
 534       q2 = ADD(q2, q2);
 535       r2 = ADD(ADD(r2, r2), ONE(mode));
 536     }
 537     delta = SUB(SUB(d, ONE(mode)), r2);
 538   } while (p < 2*bits &&
 539           (CMP(q1, delta) & Lt || (CMP(q1, delta) & Eq && CMP(r1, ZERO(mode)) & Eq)));
 540
 541   magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 542   magu.s = p - bits;                                 /* and shift amount */
 543
 544   tarval_set_integer_overflow_mode(rem);
 545
 546   return magu;
 547 }
 548
 549 /**
 550  * build the Mulh replacement code for n / tv
 551  *
 552  * Note thet 'div' might be a mod or DivMod operation as well
 553  */
 554 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
 555 {
 556   dbg_info *dbg  = get_irn_dbg_info(div);
 557   ir_node *n     = get_binop_left(div);
 558   ir_node *block = get_nodes_block(div);
 559   ir_mode *mode  = get_irn_mode(n);
 560   int bits       = get_mode_size_bits(mode);
 561   ir_node *q, *t, *c;
 562
 563   /* Beware: do not transform bad code */
 564   if (is_Bad(n) || is_Bad(block))
 565     return div;
 566
 567   if (mode_is_signed(mode)) {
 568     struct ms mag = magic(tv);
 569
 570     /* generate the Mulh instruction */
 571     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 572     q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 573
 574     /* do we need an Add or Sub */
 575     if (mag.need_add)
 576       q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 577     else if (mag.need_sub)
 578       q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 579
 580     /* Do we need the shift */
 581     if (mag.s > 0) {
 582       c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(mag.s, mode_Iu));
 583       q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 584     }
 585
 586     /* final */
 587     c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits-1, mode_Iu));
 588     t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 589
 590     q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 591   }
 592   else {
 593     struct mu mag = magicu(tv);
 594     ir_node *c;
 595
 596     /* generate the Mulh instruction */
 597     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 598     q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 599
 600     if (mag.need_add) {
 601       if (mag.s > 0) {
 602         /* use the GM scheme */
 603         t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 604
 605         c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 606         t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 607
 608         t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 609
 610         c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(mag.s-1, mode_Iu));
 611         q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 612       }
 613       else {
 614         /* use the default scheme */
 615         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 616       }
 617     }
 618     else if (mag.s > 0) { /* default scheme, shift needed */
 619       c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(mag.s, mode_Iu));
 620       q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 621     }
 622   }
 623   return q;
 624 }
 625
 626 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
 627 {
 628   ir_node *res  = irn;
 629
 630   /* If the architecture dependent optimizations were not initialized
 631      or this optimization was not enabled. */
 632   if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 633     return irn;
 634
 635   if (get_irn_opcode(irn) == iro_Div) {
 636     ir_node *c = get_Div_right(irn);
 637     ir_node *block, *left;
 638     ir_mode *mode;
 639     tarval *tv, *ntv;
 640     dbg_info *dbg;
 641     int n, bits;
 642     int k, n_flag;
 643
 644     if (get_irn_op(c) != op_Const)
 645       return irn;
 646
 647     left  = get_Div_left(irn);
 648     mode  = get_irn_mode(left);
 649     block = get_nodes_block(irn);
 650     dbg   = get_irn_dbg_info(irn);
 651     tv    = get_Const_tarval(c);
 652
 653     bits = get_mode_size_bits(mode);
 654     n    = (bits + 7) / 8;
 655
 656     k = -1;
 657     if (mode_is_signed(mode)) {
 658       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 659       ntv = tarval_neg(tv);
 660       n_flag = 1;
 661       k = tv_ld2(ntv, n);
 662     }
 663
 664     if (k < 0) {
 665       n_flag = 0;
 666       k = tv_ld2(tv, n);
 667     }
 668
 669     if (k >= 0) { /* division by 2^k or -2^k */
 670       if (mode_is_signed(mode)) {
 671         ir_node *k_node;
 672         ir_node *curr = left;
 673
 674         if (k != 1) {
 675           k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
 676           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 677         }
 678
 679         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
 680         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 681
 682         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 683
 684         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 685         res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 686
 687         if (n_flag) { /* negate the result */
 688           ir_node *k_node;
 689
 690           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 691           res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 692         }
 693       }
 694       else {      /* unsigned case */
 695         ir_node *k_node;
 696
 697         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 698         res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 699       }
 700     }
 701     else {
 702       /* other constant */
 703       if (allow_Mulh(mode))
 704         res = replace_div_by_mulh(irn, tv);
 705     }
 706   }
 707
 708   if (res != irn)
 709     stat_arch_dep_replace_div_by_const(irn);
 710
 711   return res;
 712 }
 713
 714 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
 715 {
 716   ir_node *res  = irn;
 717
 718   /* If the architecture dependent optimizations were not initialized
 719      or this optimization was not enabled. */
 720   if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 721     return irn;
 722
 723   if (get_irn_opcode(irn) == iro_Mod) {
 724     ir_node *c = get_Mod_right(irn);
 725     ir_node *block, *left;
 726     ir_mode *mode;
 727     tarval *tv, *ntv;
 728     dbg_info *dbg;
 729     int n, bits;
 730     int k;
 731
 732     if (get_irn_op(c) != op_Const)
 733       return irn;
 734
 735     left  = get_Mod_left(irn);
 736     mode  = get_irn_mode(left);
 737     block = get_nodes_block(irn);
 738     dbg   = get_irn_dbg_info(irn);
 739     tv    = get_Const_tarval(c);
 740
 741     bits = get_mode_size_bits(mode);
 742     n    = (bits + 7) / 8;
 743
 744     k = -1;
 745     if (mode_is_signed(mode)) {
 746       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 747       ntv = tarval_neg(tv);
 748       k = tv_ld2(ntv, n);
 749     }
 750
 751     if (k < 0) {
 752       k = tv_ld2(tv, n);
 753     }
 754
 755     if (k >= 0) {
 756       /* division by 2^k or -2^k:
 757        * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 758        */
 759       if (mode_is_signed(mode)) {
 760         ir_node *k_node;
 761         ir_node *curr = left;
 762
 763         if (k != 1) {
 764           k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
 765           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 766         }
 767
 768         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
 769         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 770
 771         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 772
 773         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((-1) << k, mode));
 774         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 775
 776         res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 777       }
 778       else {      /* unsigned case */
 779         ir_node *k_node;
 780
 781         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((1 << k) - 1, mode));
 782         res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 783       }
 784     }
 785     else {
 786       /* other constant */
 787       if (allow_Mulh(mode)) {
 788         res = replace_div_by_mulh(irn, tv);
 789
 790         res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 791
 792         /* res = arch_dep_mul_to_shift(res); */
 793
 794         res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 795       }
 796     }
 797   }
 798
 799   if (res != irn)
 800     stat_arch_dep_replace_mod_by_const(irn);
 801
 802   return res;
 803 }
 804
 805 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
 806 {
 807   *div = *mod = NULL;
 808
 809   /* If the architecture dependent optimizations were not initialized
 810      or this optimization was not enabled. */
 811   if (params == NULL ||
 812       ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 813     return;
 814
 815   if (get_irn_opcode(irn) == iro_DivMod) {
 816     ir_node *c = get_DivMod_right(irn);
 817     ir_node *block, *left;
 818     ir_mode *mode;
 819     tarval *tv, *ntv;
 820     dbg_info *dbg;
 821     int n, bits;
 822     int k, n_flag;
 823
 824     if (get_irn_op(c) != op_Const)
 825       return;
 826
 827     left  = get_DivMod_left(irn);
 828     mode  = get_irn_mode(left);
 829     block = get_nodes_block(irn);
 830     dbg   = get_irn_dbg_info(irn);
 831     tv    = get_Const_tarval(c);
 832
 833     bits = get_mode_size_bits(mode);
 834     n    = (bits + 7) / 8;
 835
 836     k = -1;
 837     if (mode_is_signed(mode)) {
 838       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 839       ntv = tarval_neg(tv);
 840       n_flag = 1;
 841       k = tv_ld2(ntv, n);
 842     }
 843
 844     if (k < 0) {
 845       n_flag = 0;
 846       k = tv_ld2(tv, n);
 847     }
 848
 849     if (k >= 0) { /* division by 2^k or -2^k */
 850       if (mode_is_signed(mode)) {
 851         ir_node *k_node, *c_k;
 852         ir_node *curr = left;
 853
 854         if (k != 1) {
 855           k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
 856           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 857         }
 858
 859         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
 860         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 861
 862         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 863
 864         c_k    = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 865
 866         *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 867
 868         if (n_flag) { /* negate the div result */
 869           ir_node *k_node;
 870
 871           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 872           *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 873         }
 874
 875         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((-1) << k, mode));
 876         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 877
 878         *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 879       }
 880       else {      /* unsigned case */
 881         ir_node *k_node;
 882
 883         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 884         *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 885
 886         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((1 << k) - 1, mode));
 887         *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 888       }
 889     }
 890     else {
 891       /* other constant */
 892       if (allow_Mulh(mode)) {
 893         ir_node *t;
 894
 895         *div = replace_div_by_mulh(irn, tv);
 896
 897         t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 898
 899         /* t = arch_dep_mul_to_shift(t); */
 900
 901         *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 902       }
 903     }
 904   }
 905
 906   if (*div)
 907     stat_arch_dep_replace_DivMod_by_const(irn);
 908 }
 909
 910
 911 static const arch_dep_params_t default_params = {
 912   1,  /* also use subs */
 913   4,  /* maximum shifts */
 914   31, /* maximum shift amount */
 915
 916   0,  /* allow Mulhs */
 917   0,  /* allow Mulus */
 918   32  /* Mulh allowed up to 32 bit */
 919 };
 920
 921 const arch_dep_params_t *arch_dep_default_factory(void) {
 922   return &default_params;
 923 }