nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /**
   2  * @file irarch.c
   3  * @date 28.9.2004
   4  * @author Sebastian Hack
   5  * @brief Machine dependent firm optimizations.
   6  *
   7  * $Id$
   8  */
   9 #include <stdlib.h>
  10 #include <assert.h>
  11
  12 #include "irnode_t.h"
  13 #include "irgraph_t.h"
  14 #include "irmode_t.h"
  15 #include "iropt_t.h"
  16 #include "ircons_t.h"
  17 #include "irgmod.h"
  18 #include "irvrfy.h"
  19 #include "tv.h"
  20 #include "dbginfo_t.h"
  21 #include "iropt_dbg.h"
  22 #include "irflag_t.h"
  23 #include "irhooks.h"
  24 #include "ircons.h"
  25 #include "irarch.h"
  26
  27 #undef DEB
  28
  29 #define MAX_BITSTR 64
  30
  31 /* when we need verifying */
  32 #ifdef NDEBUG
  33 # define IRN_VRFY_IRG(res, irg)
  34 #else
  35 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  36 #endif
  37
  38 /** The params got from the factory in arch_dep_init(...). */
  39 static const arch_dep_params_t *params = NULL;
  40
  41 /** The bit mask, which optimizations to apply. */
  42 static arch_dep_opts_t opts;
  43
  44 /* we need this new pseudo op */
  45 static ir_op *op_Mulh = NULL;
  46
  47 /**
  48  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  49  */
  50 static ir_node *
  51 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  52        ir_node *op1, ir_node *op2, ir_mode *mode)
  53 {
  54   ir_node *in[2];
  55   ir_node *res;
  56
  57   if (! op_Mulh) {
  58     op_Mulh = new_ir_op(get_next_ir_opcode(), "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0);
  59   }
  60
  61   in[0] = op1;
  62   in[1] = op2;
  63   res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  64   res = optimize_node(res);
  65   IRN_VRFY_IRG(res, irg);
  66   return res;
  67 }
  68
  69 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  70
  71 void arch_dep_init(arch_dep_params_factory_t factory)
  72 {
  73   opts = arch_dep_none;
  74
  75   if (factory != NULL)
  76     params = factory();
  77
  78   if (params && (opts & (arch_dep_div_by_const|arch_dep_mod_by_const))) {
  79     if (! op_Mulh) {
  80       /* create the Mulh operation */
  81       op_Mulh = new_ir_op(get_next_ir_opcode(), "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0);
  82     }
  83   }
  84 }
  85
  86 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
  87   opts = the_opts;
  88 }
  89
  90 /* check, wheater a mode allows a Mulh instruction */
  91 static int allow_Mulh(ir_mode *mode)
  92 {
  93   if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
  94     return 0;
  95   return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
  96 }
  97
  98 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
  99 {
 100   ir_node *res = irn;
 101   ir_mode *mode = get_irn_mode(irn);
 102
 103   /* If the architecture dependent optimizations were not initialized
 104      or this optimization was not enabled. */
 105   if(params == NULL || (opts & arch_dep_mul_to_shift) == 0)
 106     return irn;
 107
 108   if(get_irn_opcode(irn) == iro_Mul && mode_is_int(mode)) {
 109     ir_node *block   = get_nodes_block(irn);
 110     ir_node *left    = get_binop_left(irn);
 111     ir_node *right   = get_binop_right(irn);
 112     tarval *tv       = NULL;
 113     ir_node *operand = NULL;
 114
 115     /* Look, if one operand is a constant. */
 116     if(get_irn_opcode(left) == iro_Const) {
 117       tv = get_Const_tarval(left);
 118       operand = right;
 119     } else if(get_irn_opcode(right) == iro_Const) {
 120       tv = get_Const_tarval(right);
 121       operand = left;
 122     }
 123
 124     if(tv != NULL) {
 125       int maximum_shifts = params->maximum_shifts;
 126       int also_use_subs = params->also_use_subs;
 127       int highest_shift_amount = params->highest_shift_amount;
 128
 129       char *bitstr = get_tarval_bitpattern(tv);
 130       char *p;
 131       int i, last = 0;
 132       int counter = 0;
 133       int curr_bit;
 134       int compr_len = 0;
 135       char compr[MAX_BITSTR];
 136
 137       int singleton;
 138       int end_of_group;
 139       int shift_with_sub[MAX_BITSTR] = { 0 };
 140       int shift_without_sub[MAX_BITSTR] = { 0 };
 141       int shift_with_sub_pos = 0;
 142       int shift_without_sub_pos = 0;
 143
 144 #if DEB
 145       {
 146         long val = get_tarval_long(tv);
 147         fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 148         for(p = bitstr; *p != '\0'; p++)
 149           printf("%c", *p);
 150         printf("\n");
 151       }
 152 #endif
 153
 154       for(p = bitstr; *p != '\0'; p++) {
 155         int bit = *p != '0';
 156
 157         if (bit != last) {
 158           /* The last was 1 we are now at 0 OR
 159            * The last was 0 and we are now at 1 */
 160           compr[compr_len++] = counter;
 161           counter = 1;
 162         }
 163         else
 164           counter++;
 165
 166         last = bit;
 167       }
 168       compr[compr_len++] = counter;
 169
 170
 171 #ifdef DEB
 172       {
 173         const char *prefix = "";
 174         for(i = 0; i < compr_len; i++, prefix = ",")
 175           fprintf(stderr, "%s%d", prefix, compr[i]);
 176         fprintf("\n");
 177       }
 178 #endif
 179
 180       // Go over all recorded one groups.
 181       curr_bit = compr[0];
 182
 183       for(i = 1; i < compr_len; i = end_of_group + 2) {
 184         int j, zeros_in_group, ones_in_group;
 185
 186         ones_in_group = compr[i];
 187         zeros_in_group = 0;
 188
 189         // Scan for singular 0s in a sequence
 190         for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 191           zeros_in_group += 1;
 192           ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 193         }
 194         end_of_group = j - 1;
 195
 196         if(zeros_in_group >= ones_in_group - 1)
 197           end_of_group = i;
 198
 199 #ifdef DEB
 200         fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 201 #endif
 202
 203         singleton = compr[i] == 1 && i == end_of_group;
 204         for(j = i; j <= end_of_group; j += 2) {
 205           int curr_ones = compr[j];
 206           int biased_curr_bit = curr_bit + 1;
 207           int k;
 208
 209 #ifdef DEB
 210           fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 211 #endif
 212
 213           // If this ones group is a singleton group (it has no
 214           // singleton zeros inside
 215           if(singleton)
 216             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 217           else if(j == i)
 218             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 219
 220           for(k = 0; k < curr_ones; k++)
 221             shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 222
 223           curr_bit += curr_ones;
 224           biased_curr_bit = curr_bit + 1;
 225
 226           if(!singleton && j == end_of_group)
 227             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 228           else if(j != end_of_group)
 229             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 230
 231           curr_bit += compr[j + 1];
 232         }
 233
 234       }
 235
 236       {
 237         int *shifts = shift_with_sub;
 238         int n = shift_with_sub_pos;
 239         int highest_shift_wide = 0;
 240         int highest_shift_seq = 0;
 241         int last_shift = 0;
 242
 243         /* If we may not use subs, or we can achive the same with adds,
 244            prefer adds. */
 245         if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 246           shifts = shift_without_sub;
 247           n = shift_without_sub_pos;
 248         }
 249
 250         /* If the number of needed shifts exceeds the given maximum,
 251            use the Mul and exit. */
 252         if(n > maximum_shifts) {
 253 #ifdef DEB
 254           fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 255               maximum_shifts, n);
 256 #endif
 257           return irn;
 258         }
 259
 260         /* Compute the highest shift needed for both, the
 261            sequential and wide representations. */
 262         for(i = 0; i < n; i++) {
 263           int curr = abs(shifts[i]);
 264           int curr_seq = curr - last;
 265
 266           highest_shift_wide = curr > highest_shift_wide ? curr
 267             : highest_shift_wide;
 268           highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
 269             : highest_shift_seq;
 270
 271           last_shift = curr;
 272         }
 273
 274         /* If the highest shift amount is greater than the given limit,
 275            give back the Mul */
 276         if(highest_shift_seq > highest_shift_amount) {
 277 #ifdef DEB
 278           fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 279               highest_shift_seq, highest_shift_amount);
 280 #endif
 281           return irn;
 282         }
 283
 284         /* If we have subs, we cannot do sequential. */
 285         if(1 /* also_use_subs */) {
 286           if(n > 0) {
 287             ir_node *curr = NULL;
 288
 289             i = n - 1;
 290
 291             do {
 292               int curr_shift = shifts[i];
 293               int sub = curr_shift < 0;
 294               int amount = abs(curr_shift) - 1;
 295               ir_node *aux = operand;
 296
 297
 298               assert(amount >= 0 && "What is a negative shift??");
 299
 300               if(amount != 0) {
 301                 tarval *shift_amount = new_tarval_from_long(amount, mode_Iu);
 302                 ir_node *cnst = new_r_Const(current_ir_graph, block, mode_Iu, shift_amount);
 303                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 304               }
 305
 306               if(curr) {
 307                 if(sub)
 308                   curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 309                 else
 310                   curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 311               } else
 312                 curr = aux;
 313
 314             } while(--i >= 0);
 315
 316             res = curr;
 317           }
 318         }
 319
 320 #ifdef DEB
 321         {
 322           const char *prefix = "";
 323           for(i = 0; i < n; i++) {
 324             fprintf(stderr, "%s%d", prefix, shifts[i]);
 325             prefix = ", ";
 326           }
 327           fprintf(stderr, "\n");
 328         }
 329 #endif
 330
 331       }
 332
 333       if(bitstr)
 334         free(bitstr);
 335     }
 336
 337   }
 338
 339   if (res != irn)
 340     hook_arch_dep_replace_mul_with_shifts(irn);
 341
 342   return res;
 343 }
 344
 345 /**
 346  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 347  */
 348 static int tv_ld2(tarval *tv, int bits)
 349 {
 350   int i, k, num;
 351
 352   for (num = i = 0; i < bits; ++i) {
 353     unsigned char v = get_tarval_sub_bits(tv, i);
 354
 355     if (v) {
 356       int j;
 357
 358       for (j = 0; j < 8; ++j)
 359         if ((1 << j) & v) {
 360           ++num;
 361           k = 8 * i + j;
 362         }
 363     }
 364   }
 365   if (num == 1)
 366     return k;
 367   return -1;
 368 }
 369
 370
 371 /* for shorter lines */
 372 #define ABS(a)    tarval_abs(a)
 373 #define NEG(a)    tarval_neg(a)
 374 #define NOT(a)    tarval_not(a)
 375 #define SHL(a, b) tarval_shl(a, b)
 376 #define SHR(a, b) tarval_shr(a, b)
 377 #define ADD(a, b) tarval_add(a, b)
 378 #define SUB(a, b) tarval_sub(a, b)
 379 #define MUL(a, b) tarval_mul(a, b)
 380 #define DIV(a, b) tarval_div(a, b)
 381 #define MOD(a, b) tarval_mod(a, b)
 382 #define CMP(a, b) tarval_cmp(a, b)
 383 #define CNV(a, m) tarval_convert_to(a, m)
 384 #define ONE(m)    get_mode_one(m)
 385 #define ZERO(m)   get_mode_null(m)
 386
 387 struct ms {
 388   tarval *M;        /**< magic number */
 389   int s;            /**< shift amount */
 390   int need_add;     /**< an additional add is needed */
 391   int need_sub;     /**< an additional sub is needed */
 392 };
 393
 394 /**
 395  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 396  *
 397  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 398  */
 399 static struct ms magic(tarval *d)
 400 {
 401   ir_mode *mode   = get_tarval_mode(d);
 402   ir_mode *u_mode = find_unsigned_mode(mode);
 403   int bits        = get_mode_size_bits(u_mode);
 404   int p;
 405   tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 406   pnc_number d_cmp, M_cmp;
 407
 408   tarval *bits_minus_1, *two_bits_1;
 409
 410   struct ms mag;
 411
 412   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 413
 414   /* we need overflow mode to work correctly */
 415   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 416
 417   /* 2^(bits-1) */
 418   bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 419   two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 420
 421   ad  = CNV(ABS(d), u_mode);
 422   t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 423   anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 424   p   = bits - 1;                               /* Init: p */
 425   q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 426   r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 427   q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 428   r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 429
 430   do {
 431     ++p;
 432     q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 433     r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 434
 435     if (CMP(r1, anc) & Ge) {
 436       q1 = ADD(q1, ONE(u_mode));
 437       r1 = SUB(r1, anc);
 438     }
 439
 440     q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 441     r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 442
 443     if (CMP(r2, ad) & Ge) {
 444       q2 = ADD(q2, ONE(u_mode));
 445       r2 = SUB(r2, ad);
 446     }
 447
 448     delta = SUB(ad, r2);
 449   } while (CMP(q1, delta) & Lt || (CMP(q1, delta) & Eq && CMP(r1, ZERO(u_mode)) & Eq));
 450
 451   d_cmp = CMP(d, ZERO(mode));
 452
 453   if (d_cmp & Ge)
 454     mag.M = ADD(CNV(q2, mode), ONE(mode));
 455   else
 456     mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 457
 458   M_cmp = CMP(mag.M, ZERO(mode));
 459
 460   mag.s = p - bits;
 461
 462   /* need an add if d > 0 && M < 0 */
 463   mag.need_add = d_cmp & Gt && M_cmp & Lt;
 464
 465   /* need a sub if d < 0 && M > 0 */
 466   mag.need_sub = d_cmp & Lt && M_cmp & Gt;
 467
 468   tarval_set_integer_overflow_mode(rem);
 469
 470   return mag;
 471 }
 472
 473 struct mu {
 474   tarval *M;        /**< magic add constant */
 475   int s;            /**< shift amount */
 476   int need_add;     /**< add indicator */
 477 };
 478
 479 /**
 480  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 481  *
 482  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 483  */
 484 static struct mu magicu(tarval *d)
 485 {
 486   ir_mode *mode   = get_tarval_mode(d);
 487   int bits        = get_mode_size_bits(mode);
 488   int p;
 489   tarval *nc, *delta, *q1, *r1, *q2, *r2;
 490   tarval *bits_minus_1, *two_bits_1, *seven_ff;
 491
 492   struct mu magu;
 493
 494   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 495
 496   /* we need overflow mode to work correctly */
 497   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 498
 499   bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 500   two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 501   seven_ff     = SUB(two_bits_1, ONE(mode));
 502
 503   magu.need_add = 0;                            /* initialize the add indicator */
 504   nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 505   p  = bits - 1;                                /* Init: p */
 506   q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 507   r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 508   q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 509   r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 510
 511   do {
 512     ++p;
 513     if (CMP(r1, SUB(nc, r1)) & Ge) {
 514       q1 = ADD(ADD(q1, q1), ONE(mode));
 515       r1 = SUB(ADD(r1, r1), nc);
 516     }
 517     else {
 518       q1 = ADD(q1, q1);
 519       r1 = ADD(r1, r1);
 520     }
 521
 522     if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & Ge) {
 523       if (CMP(q2, seven_ff) & Ge)
 524         magu.need_add = 1;
 525
 526       q2 = ADD(ADD(q2, q2), ONE(mode));
 527       r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 528     }
 529     else {
 530       if (CMP(q2, two_bits_1) & Ge)
 531         magu.need_add = 1;
 532
 533       q2 = ADD(q2, q2);
 534       r2 = ADD(ADD(r2, r2), ONE(mode));
 535     }
 536     delta = SUB(SUB(d, ONE(mode)), r2);
 537   } while (p < 2*bits &&
 538           (CMP(q1, delta) & Lt || (CMP(q1, delta) & Eq && CMP(r1, ZERO(mode)) & Eq)));
 539
 540   magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 541   magu.s = p - bits;                                 /* and shift amount */
 542
 543   tarval_set_integer_overflow_mode(rem);
 544
 545   return magu;
 546 }
 547
 548 /**
 549  * build the Mulh replacement code for n / tv
 550  *
 551  * Note thet 'div' might be a mod or DivMod operation as well
 552  */
 553 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
 554 {
 555   dbg_info *dbg  = get_irn_dbg_info(div);
 556   ir_node *n     = get_binop_left(div);
 557   ir_node *block = get_nodes_block(div);
 558   ir_mode *mode  = get_irn_mode(n);
 559   int bits       = get_mode_size_bits(mode);
 560   ir_node *q, *t, *c;
 561
 562   /* Beware: do not transform bad code */
 563   if (is_Bad(n) || is_Bad(block))
 564     return div;
 565
 566   if (mode_is_signed(mode)) {
 567     struct ms mag = magic(tv);
 568
 569     /* generate the Mulh instruction */
 570     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 571     q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 572
 573     /* do we need an Add or Sub */
 574     if (mag.need_add)
 575       q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 576     else if (mag.need_sub)
 577       q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 578
 579     /* Do we need the shift */
 580     if (mag.s > 0) {
 581       c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(mag.s, mode_Iu));
 582       q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 583     }
 584
 585     /* final */
 586     c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits-1, mode_Iu));
 587     t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 588
 589     q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 590   }
 591   else {
 592     struct mu mag = magicu(tv);
 593     ir_node *c;
 594
 595     /* generate the Mulh instruction */
 596     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 597     q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 598
 599     if (mag.need_add) {
 600       if (mag.s > 0) {
 601         /* use the GM scheme */
 602         t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 603
 604         c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 605         t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 606
 607         t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 608
 609         c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(mag.s-1, mode_Iu));
 610         q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 611       }
 612       else {
 613         /* use the default scheme */
 614         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 615       }
 616     }
 617     else if (mag.s > 0) { /* default scheme, shift needed */
 618       c = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(mag.s, mode_Iu));
 619       q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 620     }
 621   }
 622   return q;
 623 }
 624
 625 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
 626 {
 627   ir_node *res  = irn;
 628
 629   /* If the architecture dependent optimizations were not initialized
 630      or this optimization was not enabled. */
 631   if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 632     return irn;
 633
 634   if (get_irn_opcode(irn) == iro_Div) {
 635     ir_node *c = get_Div_right(irn);
 636     ir_node *block, *left;
 637     ir_mode *mode;
 638     tarval *tv, *ntv;
 639     dbg_info *dbg;
 640     int n, bits;
 641     int k, n_flag;
 642
 643     if (get_irn_op(c) != op_Const)
 644       return irn;
 645
 646     left  = get_Div_left(irn);
 647     mode  = get_irn_mode(left);
 648     block = get_nodes_block(irn);
 649     dbg   = get_irn_dbg_info(irn);
 650     tv    = get_Const_tarval(c);
 651
 652     bits = get_mode_size_bits(mode);
 653     n    = (bits + 7) / 8;
 654
 655     k = -1;
 656     if (mode_is_signed(mode)) {
 657       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 658       ntv = tarval_neg(tv);
 659       n_flag = 1;
 660       k = tv_ld2(ntv, n);
 661     }
 662
 663     if (k < 0) {
 664       n_flag = 0;
 665       k = tv_ld2(tv, n);
 666     }
 667
 668     if (k >= 0) { /* division by 2^k or -2^k */
 669       if (mode_is_signed(mode)) {
 670         ir_node *k_node;
 671         ir_node *curr = left;
 672
 673         if (k != 1) {
 674           k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
 675           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 676         }
 677
 678         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
 679         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 680
 681         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 682
 683         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 684         res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 685
 686         if (n_flag) { /* negate the result */
 687           ir_node *k_node;
 688
 689           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 690           res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 691         }
 692       }
 693       else {      /* unsigned case */
 694         ir_node *k_node;
 695
 696         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 697         res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 698       }
 699     }
 700     else {
 701       /* other constant */
 702       if (allow_Mulh(mode))
 703         res = replace_div_by_mulh(irn, tv);
 704     }
 705   }
 706
 707   if (res != irn)
 708     hook_arch_dep_replace_div_by_const(irn);
 709
 710   return res;
 711 }
 712
 713 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
 714 {
 715   ir_node *res  = irn;
 716
 717   /* If the architecture dependent optimizations were not initialized
 718      or this optimization was not enabled. */
 719   if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 720     return irn;
 721
 722   if (get_irn_opcode(irn) == iro_Mod) {
 723     ir_node *c = get_Mod_right(irn);
 724     ir_node *block, *left;
 725     ir_mode *mode;
 726     tarval *tv, *ntv;
 727     dbg_info *dbg;
 728     int n, bits;
 729     int k;
 730
 731     if (get_irn_op(c) != op_Const)
 732       return irn;
 733
 734     left  = get_Mod_left(irn);
 735     mode  = get_irn_mode(left);
 736     block = get_nodes_block(irn);
 737     dbg   = get_irn_dbg_info(irn);
 738     tv    = get_Const_tarval(c);
 739
 740     bits = get_mode_size_bits(mode);
 741     n    = (bits + 7) / 8;
 742
 743     k = -1;
 744     if (mode_is_signed(mode)) {
 745       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 746       ntv = tarval_neg(tv);
 747       k = tv_ld2(ntv, n);
 748     }
 749
 750     if (k < 0) {
 751       k = tv_ld2(tv, n);
 752     }
 753
 754     if (k >= 0) {
 755       /* division by 2^k or -2^k:
 756        * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 757        */
 758       if (mode_is_signed(mode)) {
 759         ir_node *k_node;
 760         ir_node *curr = left;
 761
 762         if (k != 1) {
 763           k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
 764           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 765         }
 766
 767         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
 768         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 769
 770         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 771
 772         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((-1) << k, mode));
 773         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 774
 775         res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 776       }
 777       else {      /* unsigned case */
 778         ir_node *k_node;
 779
 780         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((1 << k) - 1, mode));
 781         res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 782       }
 783     }
 784     else {
 785       /* other constant */
 786       if (allow_Mulh(mode)) {
 787         res = replace_div_by_mulh(irn, tv);
 788
 789         res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 790
 791         /* res = arch_dep_mul_to_shift(res); */
 792
 793         res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 794       }
 795     }
 796   }
 797
 798   if (res != irn)
 799     hook_arch_dep_replace_mod_by_const(irn);
 800
 801   return res;
 802 }
 803
 804 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
 805 {
 806   *div = *mod = NULL;
 807
 808   /* If the architecture dependent optimizations were not initialized
 809      or this optimization was not enabled. */
 810   if (params == NULL ||
 811       ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 812     return;
 813
 814   if (get_irn_opcode(irn) == iro_DivMod) {
 815     ir_node *c = get_DivMod_right(irn);
 816     ir_node *block, *left;
 817     ir_mode *mode;
 818     tarval *tv, *ntv;
 819     dbg_info *dbg;
 820     int n, bits;
 821     int k, n_flag;
 822
 823     if (get_irn_op(c) != op_Const)
 824       return;
 825
 826     left  = get_DivMod_left(irn);
 827     mode  = get_irn_mode(left);
 828     block = get_nodes_block(irn);
 829     dbg   = get_irn_dbg_info(irn);
 830     tv    = get_Const_tarval(c);
 831
 832     bits = get_mode_size_bits(mode);
 833     n    = (bits + 7) / 8;
 834
 835     k = -1;
 836     if (mode_is_signed(mode)) {
 837       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 838       ntv = tarval_neg(tv);
 839       n_flag = 1;
 840       k = tv_ld2(ntv, n);
 841     }
 842
 843     if (k < 0) {
 844       n_flag = 0;
 845       k = tv_ld2(tv, n);
 846     }
 847
 848     if (k >= 0) { /* division by 2^k or -2^k */
 849       if (mode_is_signed(mode)) {
 850         ir_node *k_node, *c_k;
 851         ir_node *curr = left;
 852
 853         if (k != 1) {
 854           k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
 855           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 856         }
 857
 858         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
 859         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 860
 861         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 862
 863         c_k    = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 864
 865         *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 866
 867         if (n_flag) { /* negate the div result */
 868           ir_node *k_node;
 869
 870           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 871           *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 872         }
 873
 874         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((-1) << k, mode));
 875         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 876
 877         *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 878       }
 879       else {      /* unsigned case */
 880         ir_node *k_node;
 881
 882         k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
 883         *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 884
 885         k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((1 << k) - 1, mode));
 886         *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 887       }
 888     }
 889     else {
 890       /* other constant */
 891       if (allow_Mulh(mode)) {
 892         ir_node *t;
 893
 894         *div = replace_div_by_mulh(irn, tv);
 895
 896         t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 897
 898         /* t = arch_dep_mul_to_shift(t); */
 899
 900         *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 901       }
 902     }
 903   }
 904
 905   if (*div)
 906     hook_arch_dep_replace_DivMod_by_const(irn);
 907 }
 908
 909
 910 static const arch_dep_params_t default_params = {
 911   1,  /* also use subs */
 912   4,  /* maximum shifts */
 913   31, /* maximum shift amount */
 914
 915   0,  /* allow Mulhs */
 916   0,  /* allow Mulus */
 917   32  /* Mulh allowed up to 32 bit */
 918 };
 919
 920 const arch_dep_params_t *arch_dep_default_factory(void) {
 921   return &default_params;
 922 }