nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /**
   2  * @file irarch.c
   3  * @date 28.9.2004
   4  * @author Sebastian Hack
   5  * @brief Machine dependent firm optimizations.
   6  *
   7  * $Id$
   8  */
   9 #include <stdlib.h>
  10 #include <assert.h>
  11
  12 #include "irnode_t.h"
  13 #include "irgraph_t.h"
  14 #include "irmode_t.h"
  15 #include "iropt_t.h"
  16 #include "ircons_t.h"
  17 #include "irgmod.h"
  18 #include "irvrfy.h"
  19 #include "tv.h"
  20 #include "dbginfo_t.h"
  21 #include "iropt_dbg.h"
  22 #include "irflag_t.h"
  23 #include "firmstat.h"
  24 #include "ircons.h"
  25 #include "irarch.h"
  26 #include "firmstat.h"
  27
  28 #undef DEB
  29
  30 #define MAX_BITSTR 64
  31
  32 /* when we need verifying */
  33 #ifdef NDEBUG
  34 # define IRN_VRFY_IRG(res, irg)
  35 #else
  36 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  37 #endif
  38
  39 /** The params got from the factory in arch_dep_init(...). */
  40 static const arch_dep_params_t *params = NULL;
  41
  42 /** The bit mask, which optimizations to apply. */
  43 static arch_dep_opts_t opts;
  44
  45 /* we need this new pseudo op */
  46 static ir_op *op_Mulh = NULL;
  47
  48 /**
  49  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  50  */
  51 static ir_node *
  52 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  53        ir_node *op1, ir_node *op2, ir_mode *mode)
  54 {
  55   ir_node *in[2];
  56   ir_node *res;
  57
  58   if (! op_Mulh) {
  59     op_Mulh = new_ir_op(get_next_ir_opcode(), "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0);
  60   }
  61
  62   in[0] = op1;
  63   in[1] = op2;
  64   res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  65   res = optimize_node(res);
  66   IRN_VRFY_IRG(res, irg);
  67   return res;
  68 }
  69
  70 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  71
  72 void arch_dep_init(arch_dep_params_factory_t factory)
  73 {
  74   opts = arch_dep_none;
  75
  76   if (factory != NULL)
  77     params = factory();
  78
  79   if (params && (opts & (arch_dep_div_by_const|arch_dep_mod_by_const))) {
  80     if (! op_Mulh) {
  81       /* create the Mulh operation */
  82       op_Mulh = new_ir_op(get_next_ir_opcode(), "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0);
  83     }
  84   }
  85 }
  86
  87 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
  88   opts = the_opts;
  89 }
  90
  91 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
  92 {
  93   ir_node *res = irn;
  94   ir_mode *mode = get_irn_mode(irn);
  95
  96   /* If the architecture dependent optimizations were not initialized
  97      or this optimization was not enabled. */
  98   if(params == NULL || (opts & arch_dep_mul_to_shift) == 0)
  99     return irn;
 100
 101   if(get_irn_opcode(irn) == iro_Mul && mode_is_int(mode)) {
 102     ir_node *block   = get_nodes_block(irn);
 103     ir_node *left    = get_binop_left(irn);
 104     ir_node *right   = get_binop_right(irn);
 105     tarval *tv       = NULL;
 106     ir_node *operand = NULL;
 107
 108     /* Look, if one operand is a constant. */
 109     if(get_irn_opcode(left) == iro_Const) {
 110       tv = get_Const_tarval(left);
 111       operand = right;
 112     } else if(get_irn_opcode(right) == iro_Const) {
 113       tv = get_Const_tarval(right);
 114       operand = left;
 115     }
 116
 117     if(tv != NULL) {
 118       int maximum_shifts = params->maximum_shifts;
 119       int also_use_subs = params->also_use_subs;
 120       int highest_shift_amount = params->highest_shift_amount;
 121
 122       char *bitstr = get_tarval_bitpattern(tv);
 123       char *p;
 124       int i, last = 0;
 125       int counter = 0;
 126       int curr_bit;
 127       int compr_len = 0;
 128       char compr[MAX_BITSTR];
 129
 130       int singleton;
 131       int end_of_group;
 132       int shift_with_sub[MAX_BITSTR] = { 0 };
 133       int shift_without_sub[MAX_BITSTR] = { 0 };
 134       int shift_with_sub_pos = 0;
 135       int shift_without_sub_pos = 0;
 136
 137 #if DEB
 138       {
 139         long val = get_tarval_long(tv);
 140         fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 141         for(p = bitstr; *p != '\0'; p++)
 142           printf("%c", *p);
 143         printf("\n");
 144       }
 145 #endif
 146
 147       for(p = bitstr; *p != '\0'; p++) {
 148         int bit = *p != '0';
 149
 150         if (bit != last) {
 151           /* The last was 1 we are now at 0 OR
 152            * The last was 0 and we are now at 1 */
 153           compr[compr_len++] = counter;
 154           counter = 1;
 155         }
 156         else
 157           counter++;
 158
 159         last = bit;
 160       }
 161       compr[compr_len++] = counter;
 162
 163
 164 #ifdef DEB
 165       {
 166         const char *prefix = "";
 167         for(i = 0; i < compr_len; i++, prefix = ",")
 168           fprintf(stderr, "%s%d", prefix, compr[i]);
 169         fprintf("\n");
 170       }
 171 #endif
 172
 173       // Go over all recorded one groups.
 174       curr_bit = compr[0];
 175
 176       for(i = 1; i < compr_len; i = end_of_group + 2) {
 177         int j, zeros_in_group, ones_in_group;
 178
 179         ones_in_group = compr[i];
 180         zeros_in_group = 0;
 181
 182         // Scan for singular 0s in a sequence
 183         for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 184           zeros_in_group += 1;
 185           ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 186         }
 187         end_of_group = j - 1;
 188
 189         if(zeros_in_group >= ones_in_group - 1)
 190           end_of_group = i;
 191
 192 #ifdef DEB
 193         fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 194 #endif
 195
 196         singleton = compr[i] == 1 && i == end_of_group;
 197         for(j = i; j <= end_of_group; j += 2) {
 198           int curr_ones = compr[j];
 199           int biased_curr_bit = curr_bit + 1;
 200           int k;
 201
 202 #ifdef DEB
 203           fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 204 #endif
 205
 206           // If this ones group is a singleton group (it has no
 207           // singleton zeros inside
 208           if(singleton)
 209             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 210           else if(j == i)
 211             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 212
 213           for(k = 0; k < curr_ones; k++)
 214             shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 215
 216           curr_bit += curr_ones;
 217           biased_curr_bit = curr_bit + 1;
 218
 219           if(!singleton && j == end_of_group)
 220             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 221           else if(j != end_of_group)
 222             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 223
 224           curr_bit += compr[j + 1];
 225         }
 226
 227       }
 228
 229       {
 230         int *shifts = shift_with_sub;
 231         int n = shift_with_sub_pos;
 232         int highest_shift_wide = 0;
 233         int highest_shift_seq = 0;
 234         int last_shift = 0;
 235
 236         /* If we may not use subs, or we can achive the same with adds,
 237            prefer adds. */
 238         if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 239           shifts = shift_without_sub;
 240           n = shift_without_sub_pos;
 241         }
 242
 243         /* If the number of needed shifts exceeds the given maximum,
 244            use the Mul and exit. */
 245         if(n > maximum_shifts) {
 246 #ifdef DEB
 247           fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 248               maximum_shifts, n);
 249 #endif
 250           return irn;
 251         }
 252
 253         /* Compute the highest shift needed for both, the
 254            sequential and wide representations. */
 255         for(i = 0; i < n; i++) {
 256           int curr = abs(shifts[i]);
 257           int curr_seq = curr - last;
 258
 259           highest_shift_wide = curr > highest_shift_wide ? curr
 260             : highest_shift_wide;
 261           highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
 262             : highest_shift_seq;
 263
 264           last_shift = curr;
 265         }
 266
 267         /* If the highest shift amount is greater than the given limit,
 268            give back the Mul */
 269         if(highest_shift_seq > highest_shift_amount) {
 270 #ifdef DEB
 271           fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 272               highest_shift_seq, highest_shift_amount);
 273 #endif
 274           return irn;
 275         }
 276
 277         /* If we have subs, we cannot do sequential. */
 278         if(1 /* also_use_subs */) {
 279           if(n > 0) {
 280             ir_node *curr = NULL;
 281
 282             i = n - 1;
 283
 284             do {
 285               int curr_shift = shifts[i];
 286               int sub = curr_shift < 0;
 287               int amount = abs(curr_shift) - 1;
 288               ir_node *aux = operand;
 289
 290
 291               assert(amount >= 0 && "What is a negative shift??");
 292
 293               if(amount != 0) {
 294                 tarval *shift_amount = new_tarval_from_long(amount, mode_Iu);
 295                 ir_node *cnst = new_r_Const(current_ir_graph, block, mode_Iu, shift_amount);
 296                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 297               }
 298
 299               if(curr) {
 300                 if(sub)
 301                   curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 302                 else
 303                   curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 304               } else
 305                 curr = aux;
 306
 307             } while(--i >= 0);
 308
 309             res = curr;
 310           }
 311         }
 312
 313 #ifdef DEB
 314         {
 315           const char *prefix = "";
 316           for(i = 0; i < n; i++) {
 317             fprintf(stderr, "%s%d", prefix, shifts[i]);
 318             prefix = ", ";
 319           }
 320           fprintf(stderr, "\n");
 321         }
 322 #endif
 323
 324       }
 325
 326       if(bitstr)
 327         free(bitstr);
 328     }
 329
 330   }
 331
 332   if (res != irn)
 333     stat_arch_dep_replace_mul_with_shifts(irn);
 334
 335   return res;
 336 }
 337
 338 /**
 339  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 340  */
 341 static int tv_ld2(tarval *tv, int bits)
 342 {
 343   int i, k, num;
 344
 345   for (num = i = 0; i < bits; ++i) {
 346     unsigned char v = get_tarval_sub_bits(tv, i);
 347
 348     if (v) {
 349       int j;
 350
 351       for (j = 0; j < 8; ++j)
 352         if ((1 << j) & v) {
 353           ++num;
 354           k = 8 * i + j;
 355         }
 356     }
 357   }
 358   if (num == 1)
 359     return k;
 360   return -1;
 361 }
 362
 363
 364 /* for shorter lines */
 365 #define ABS(a)    tarval_abs(a)
 366 #define NEG(a)    tarval_neg(a)
 367 #define NOT(a)    tarval_not(a)
 368 #define SHL(a, b) tarval_shl(a, b)
 369 #define SHR(a, b) tarval_shr(a, b)
 370 #define ADD(a, b) tarval_add(a, b)
 371 #define SUB(a, b) tarval_sub(a, b)
 372 #define MUL(a, b) tarval_mul(a, b)
 373 #define DIV(a, b) tarval_div(a, b)
 374 #define MOD(a, b) tarval_mod(a, b)
 375 #define CMP(a, b) tarval_cmp(a, b)
 376 #define CNV(a, m) tarval_convert_to(a, m)
 377 #define ONE(m)    get_mode_one(m)
 378 #define ZERO(m)   get_mode_null(m)
 379
 380 struct ms {
 381   tarval *M;        /**< magic number */
 382   int s;            /**< shift amount */
 383   int need_add;     /**< an additional add is needed */
 384   int need_sub;     /**< an additional sub is needed */
 385 };
 386
 387 /**
 388  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 389  *
 390  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 391  */
 392 static struct ms magic(tarval *d)
 393 {
 394   ir_mode *mode   = get_tarval_mode(d);
 395   ir_mode *u_mode = find_unsigned_mode(mode);
 396   int bits        = get_mode_size_bits(u_mode);
 397   int p;
 398   tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 399   pnc_number d_cmp, M_cmp;
 400
 401   tarval *bits_minus_1, *two_bits_1;
 402
 403   struct ms mag;
 404
 405   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 406
 407   /* we need overflow mode to work correctly */
 408   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 409
 410   /* 2^(bits-1) */
 411   bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 412   two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 413
 414   ad  = CNV(ABS(d), u_mode);
 415   t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 416   anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 417   p   = bits - 1;                               /* Init: p */
 418   q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 419   r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 420   q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 421   r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 422
 423   do {
 424     ++p;
 425     q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 426     r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 427
 428     if (CMP(r1, anc) & Ge) {
 429       q1 = ADD(q1, ONE(u_mode));
 430       r1 = SUB(r1, anc);
 431     }
 432
 433     q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 434     r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 435
 436     if (CMP(r2, ad) & Ge) {
 437       q2 = ADD(q2, ONE(u_mode));
 438       r2 = SUB(r2, ad);
 439     }
 440
 441     delta = SUB(ad, r2);
 442   } while (CMP(q1, delta) & Lt || (CMP(q1, delta) & Eq && CMP(r1, ZERO(u_mode)) & Eq));
 443
 444   d_cmp = CMP(d, ZERO(mode));
 445
 446   if (d_cmp & Ge)
 447     mag.M = ADD(CNV(q2, mode), ONE(mode));
 448   else
 449     mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 450
 451   M_cmp = CMP(mag.M, ZERO(mode));
 452
 453   mag.s = p - bits;
 454
 455   /* need an add if d > 0 && M < 0 */
 456   mag.need_add = d_cmp & Gt && M_cmp & Lt;
 457
 458   /* need a sub if d < 0 && M > 0 */
 459   mag.need_sub = d_cmp & Lt && M_cmp & Gt;
 460
 461   tarval_set_integer_overflow_mode(rem);
 462
 463   return mag;
 464 }
 465
 466 struct mu {
 467   tarval *M;        /**< magic add constant */
 468   int s;            /**< shift amount */
 469   int need_add;     /**< add indicator */
 470 };
 471
 472 /**
 473  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 474  *
 475  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 476  */
 477 static struct mu magicu(tarval *d)
 478 {
 479   ir_mode *mode   = get_tarval_mode(d);
 480   int bits        = get_mode_size_bits(mode);
 481   int p;
 482   tarval *nc, *delta, *q1, *r1, *q2, *r2;
 483   tarval *bits_minus_1, *two_bits_1, *seven_ff;
 484
 485   struct mu magu;
 486
 487   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 488
 489   /* we need overflow mode to work correctly */
 490   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 491
 492   bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 493   two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 494   seven_ff     = SUB(two_bits_1, ONE(mode));
 495
 496   magu.need_add = 0;                            /* initialize the add indicator */
 497   nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 498   p  = bits - 1;                                /* Init: p */
 499   q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 500   r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 501   q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 502   r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 503
 504   do {
 505     ++p;
 506     if (CMP(r1, SUB(nc, r1)) & Ge) {
 507       q1 = ADD(ADD(q1, q1), ONE(mode));
 508       r1 = SUB(ADD(r1, r1), nc);
 509     }
 510     else {
 511       q1 = ADD(q1, q1);
 512       r1 = ADD(r1, r1);
 513     }
 514
 515     if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & Ge) {
 516       if (CMP(q2, seven_ff) & Ge)
 517         magu.need_add = 1;
 518
 519       q2 = ADD(ADD(q2, q2), ONE(mode));
 520       r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 521     }
 522     else {
 523       if (CMP(q2, two_bits_1) & Ge)
 524         magu.need_add = 1;
 525
 526       q2 = ADD(q2, q2);
 527       r2 = ADD(ADD(r2, r2), ONE(mode));
 528     }
 529     delta = SUB(SUB(d, ONE(mode)), r2);
 530   } while (p < 2*bits &&
 531           (CMP(q1, delta) & Lt || (CMP(q1, delta) & Eq && CMP(r1, ZERO(mode)) & Eq)));
 532
 533   magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 534   magu.s = p - bits;                                 /* and shift amount */
 535
 536   tarval_set_integer_overflow_mode(rem);
 537
 538   return magu;
 539 }
 540
 541 /**
 542  * build the Mulh replacement code for n / tv
 543  *
 544  * Note thet 'div' might be a mod or DivMod operation as well
 545  */
 546 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
 547 {
 548   dbg_info *dbg  = get_irn_dbg_info(div);
 549   ir_node *n     = get_binop_left(div);
 550   ir_node *block = get_nodes_block(div);
 551   ir_mode *mode  = get_irn_mode(n);
 552   int bits       = get_mode_size_bits(mode);
 553   ir_node *q, *t, *c;
 554
 555   /* Beware: do not transform bad code */
 556   if (is_Bad(n) || is_Bad(block))
 557     return div;
 558
 559   if (mode_is_signed(mode)) {
 560     struct ms mag = magic(tv);
 561
 562     /* generate the Mulh instruction */
 563     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 564     q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 565
 566     /* do we need an Add or Sub */
 567     if (mag.need_add)
 568       q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 569     else if (mag.need_sub)
 570       q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 571
 572     /* Do we need the shift */
 573     if (mag.s > 0) {
 574       c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(mag.s, mode_Iu));
 575       q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 576     }
 577
 578     /* final */
 579     c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits-1, mode_Iu));
 580     t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 581
 582     q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 583   }
 584   else {
 585     struct mu mag = magicu(tv);
 586     ir_node *c;
 587
 588     /* generate the Mulh instruction */
 589     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 590     q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 591
 592     if (mag.need_add) {
 593       if (mag.s > 0) {
 594         /* use the GM scheme */
 595         t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 596
 597         c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 598         t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 599
 600         t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 601
 602         c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(mag.s-1, mode_Iu));
 603         q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 604       }
 605       else {
 606         /* use the default scheme */
 607         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 608       }
 609     }
 610     else if (mag.s > 0) { /* default scheme, shift needed */
 611       c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(mag.s, mode_Iu));
 612       q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 613     }
 614   }
 615   return q;
 616 }
 617
 618 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
 619 {
 620   ir_node *res  = irn;
 621
 622   /* If the architecture dependent optimizations were not initialized
 623      or this optimization was not enabled. */
 624   if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 625     return irn;
 626
 627   if (get_irn_opcode(irn) == iro_Div) {
 628     ir_node *c = get_Div_right(irn);
 629     ir_node *block, *left;
 630     ir_mode *mode;
 631     tarval *tv, *ntv;
 632     dbg_info *dbg;
 633     int n, bits;
 634     int k, n_flag;
 635
 636     if (get_irn_op(c) != op_Const)
 637       return irn;
 638
 639     left  = get_Div_left(irn);
 640     mode  = get_irn_mode(left);
 641     block = get_nodes_block(irn);
 642     dbg   = get_irn_dbg_info(irn);
 643     tv    = get_Const_tarval(c);
 644
 645     bits = get_mode_size_bits(mode);
 646     n    = (bits + 7) / 8;
 647
 648     k = -1;
 649     if (mode_is_signed(mode)) {
 650       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 651       ntv = tarval_neg(tv);
 652       n_flag = 1;
 653       k = tv_ld2(ntv, n);
 654     }
 655
 656     if (k < 0) {
 657       n_flag = 0;
 658       k = tv_ld2(tv, n);
 659     }
 660
 661     if (k >= 0) { /* division by 2^k or -2^k */
 662       if (mode_is_signed(mode)) {
 663         ir_node *k_node;
 664         ir_node *curr = left;
 665
 666         if (k != 1) {
 667           k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
 668           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 669         }
 670
 671         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
 672         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 673
 674         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 675
 676         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 677         res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 678
 679         if (n_flag) { /* negate the result */
 680           ir_node *k_node;
 681
 682           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 683           res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 684         }
 685       }
 686       else {      /* unsigned case */
 687         ir_node *k_node;
 688
 689         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 690         res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 691       }
 692     }
 693     else {
 694       /* other constant */
 695       if ((mode_is_signed(mode) && params->allow_mulhs) ||
 696           (!mode_is_signed(mode) && params->allow_mulhu))
 697         res = replace_div_by_mulh(irn, tv);
 698     }
 699   }
 700
 701   if (res != irn)
 702     stat_arch_dep_replace_div_by_const(irn);
 703
 704   return res;
 705 }
 706
 707 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
 708 {
 709   ir_node *res  = irn;
 710
 711   /* If the architecture dependent optimizations were not initialized
 712      or this optimization was not enabled. */
 713   if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 714     return irn;
 715
 716   if (get_irn_opcode(irn) == iro_Mod) {
 717     ir_node *c = get_Mod_right(irn);
 718     ir_node *block, *left;
 719     ir_mode *mode;
 720     tarval *tv, *ntv;
 721     dbg_info *dbg;
 722     int n, bits;
 723     int k;
 724
 725     if (get_irn_op(c) != op_Const)
 726       return irn;
 727
 728     left  = get_Mod_left(irn);
 729     mode  = get_irn_mode(left);
 730     block = get_nodes_block(irn);
 731     dbg   = get_irn_dbg_info(irn);
 732     tv    = get_Const_tarval(c);
 733
 734     bits = get_mode_size_bits(mode);
 735     n    = (bits + 7) / 8;
 736
 737     k = -1;
 738     if (mode_is_signed(mode)) {
 739       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 740       ntv = tarval_neg(tv);
 741       k = tv_ld2(ntv, n);
 742     }
 743
 744     if (k < 0) {
 745       k = tv_ld2(tv, n);
 746     }
 747
 748     if (k >= 0) {
 749       /* division by 2^k or -2^k:
 750        * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 751        */
 752       if (mode_is_signed(mode)) {
 753         ir_node *k_node;
 754         ir_node *curr = left;
 755
 756         if (k != 1) {
 757           k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
 758           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 759         }
 760
 761         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
 762         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 763
 764         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 765
 766         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((-1) << k, mode));
 767         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 768
 769         res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 770       }
 771       else {      /* unsigned case */
 772         ir_node *k_node;
 773
 774         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((1 << k) - 1, mode));
 775         res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 776       }
 777     }
 778     else {
 779       /* other constant */
 780       if ((mode_is_signed(mode) && params->allow_mulhs) ||
 781           (!mode_is_signed(mode) && params->allow_mulhu)) {
 782         res = replace_div_by_mulh(irn, tv);
 783
 784         res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 785
 786         /* res = arch_dep_mul_to_shift(res); */
 787
 788         res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 789       }
 790     }
 791   }
 792
 793   if (res != irn)
 794     stat_arch_dep_replace_mod_by_const(irn);
 795
 796   return res;
 797 }
 798
 799 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
 800 {
 801   *div = *mod = NULL;
 802
 803   /* If the architecture dependent optimizations were not initialized
 804      or this optimization was not enabled. */
 805   if (params == NULL ||
 806       ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 807     return;
 808
 809   if (get_irn_opcode(irn) == iro_DivMod) {
 810     ir_node *c = get_DivMod_right(irn);
 811     ir_node *block, *left;
 812     ir_mode *mode;
 813     tarval *tv, *ntv;
 814     dbg_info *dbg;
 815     int n, bits;
 816     int k, n_flag;
 817
 818     if (get_irn_op(c) != op_Const)
 819       return;
 820
 821     left  = get_DivMod_left(irn);
 822     mode  = get_irn_mode(left);
 823     block = get_nodes_block(irn);
 824     dbg   = get_irn_dbg_info(irn);
 825     tv    = get_Const_tarval(c);
 826
 827     bits = get_mode_size_bits(mode);
 828     n    = (bits + 7) / 8;
 829
 830     k = -1;
 831     if (mode_is_signed(mode)) {
 832       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 833       ntv = tarval_neg(tv);
 834       n_flag = 1;
 835       k = tv_ld2(ntv, n);
 836     }
 837
 838     if (k < 0) {
 839       n_flag = 0;
 840       k = tv_ld2(tv, n);
 841     }
 842
 843     if (k >= 0) { /* division by 2^k or -2^k */
 844       if (mode_is_signed(mode)) {
 845         ir_node *k_node, *c_k;
 846         ir_node *curr = left;
 847
 848         if (k != 1) {
 849           k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
 850           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 851         }
 852
 853         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
 854         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 855
 856         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 857
 858         c_k    = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 859
 860         *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 861
 862         if (n_flag) { /* negate the div result */
 863           ir_node *k_node;
 864
 865           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 866           *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 867         }
 868
 869         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((-1) << k, mode));
 870         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 871
 872         *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 873       }
 874       else {      /* unsigned case */
 875         ir_node *k_node;
 876
 877         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 878         *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 879
 880         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((1 << k) - 1, mode));
 881         *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 882       }
 883     }
 884     else {
 885       /* other constant */
 886       if ((mode_is_signed(mode) && params->allow_mulhs) ||
 887           (!mode_is_signed(mode) && params->allow_mulhu)) {
 888         ir_node *t;
 889
 890         *div = replace_div_by_mulh(irn, tv);
 891
 892         t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 893
 894         /* t = arch_dep_mul_to_shift(t); */
 895
 896         *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 897       }
 898     }
 899   }
 900
 901   if (*div)
 902     stat_arch_dep_replace_DivMod_by_const(irn);
 903 }
 904
 905
 906 static const arch_dep_params_t default_params = {
 907   1,  /* also use subs */
 908   4,  /* maximum shifts */
 909   31, /* maximum shift amount */
 910
 911   0,  /* allow Mulhs */
 912   0,  /* allow Mulus */
 913   32  /* Mulh allowed up to 32 bit */
 914 };
 915
 916 const arch_dep_params_t *arch_dep_default_factory(void) {
 917   return &default_params;
 918 }