nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /**
   2  * @file irarch.c
   3  * @date 28.9.2004
   4  * @author Sebastian Hack, Michael Beck
   5  * @brief Machine dependent firm optimizations.
   6  *
   7  * $Id$
   8  */
   9 #ifdef HAVE_CONFIG_H
  10 # include "config.h"
  11 #endif
  12
  13 #ifdef HAVE_STDLIB_H
  14 # include <stdlib.h>
  15 #endif
  16
  17 #include <assert.h>
  18
  19 #include "irnode_t.h"
  20 #include "irgraph_t.h"
  21 #include "irmode_t.h"
  22 #include "iropt_t.h"
  23 #include "ircons_t.h"
  24 #include "irgmod.h"
  25 #include "irvrfy.h"
  26 #include "tv_t.h"
  27 #include "dbginfo_t.h"
  28 #include "iropt_dbg.h"
  29 #include "irflag_t.h"
  30 #include "irhooks.h"
  31 #include "ircons.h"
  32 #include "irarch.h"
  33 #include "irreflect.h"
  34
  35 #undef DEB
  36
  37 #define MAX_BITSTR 64
  38
  39 /* when we need verifying */
  40 #ifdef NDEBUG
  41 # define IRN_VRFY_IRG(res, irg)
  42 #else
  43 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  44 #endif
  45
  46 /** The params got from the factory in arch_dep_init(...). */
  47 static const arch_dep_params_t *params = NULL;
  48
  49 /** The bit mask, which optimizations to apply. */
  50 static arch_dep_opts_t opts;
  51
  52 /* we need this new pseudo op */
  53 static ir_op *op_Mulh = NULL;
  54
  55 /**
  56  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  57  */
  58 static ir_node *
  59 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  60        ir_node *op1, ir_node *op2, ir_mode *mode)
  61 {
  62   ir_node *in[2];
  63   ir_node *res;
  64
  65   in[0] = op1;
  66   in[1] = op2;
  67   res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  68   res = optimize_node(res);
  69   IRN_VRFY_IRG(res, irg);
  70   return res;
  71 }
  72
  73 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  74
  75 void arch_dep_init(arch_dep_params_factory_t factory)
  76 {
  77   opts = arch_dep_none;
  78
  79   if (factory != NULL)
  80     params = factory();
  81
  82   if (! op_Mulh) {
  83     rflct_sig_t *sig;
  84     int mulh_opc = get_next_ir_opcode();
  85
  86     /* create the Mulh operation */
  87     op_Mulh = new_ir_op(mulh_opc, "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0, NULL);
  88     sig = rflct_signature_allocate(1, 3);
  89     rflct_signature_set_arg(sig, 0, 0, "Res", RFLCT_MC(Int), 0, 0);
  90     rflct_signature_set_arg(sig, 1, 0, "Block", RFLCT_MC(BB), 0, 0);
  91     rflct_signature_set_arg(sig, 1, 1, "Op 0", RFLCT_MC(Int), 0, 0);
  92     rflct_signature_set_arg(sig, 1, 2, "Op 1", RFLCT_MC(Int), 0, 0);
  93
  94     rflct_new_opcode(mulh_opc, "Mulh", 0);
  95     rflct_opcode_add_signature(mulh_opc, sig);
  96   }
  97 }
  98
  99 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
 100   opts = the_opts;
 101 }
 102
 103 /* check, whether a mode allows a Mulh instruction */
 104 static int allow_Mulh(ir_mode *mode)
 105 {
 106   if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
 107     return 0;
 108   return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
 109 }
 110
 111 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
 112 {
 113   ir_node *res = irn;
 114   ir_mode *mode = get_irn_mode(irn);
 115
 116   /* If the architecture dependent optimizations were not initialized
 117      or this optimization was not enabled. */
 118   if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
 119     return irn;
 120
 121   if (get_irn_op(irn) == op_Mul && mode_is_int(mode)) {
 122     ir_node *block   = get_irn_n(irn, -1);
 123     ir_node *left    = get_binop_left(irn);
 124     ir_node *right   = get_binop_right(irn);
 125     tarval *tv       = NULL;
 126     ir_node *operand = NULL;
 127
 128     /* Look, if one operand is a constant. */
 129     if (get_irn_opcode(left) == iro_Const) {
 130       tv = get_Const_tarval(left);
 131       operand = right;
 132     } else if(get_irn_opcode(right) == iro_Const) {
 133       tv = get_Const_tarval(right);
 134       operand = left;
 135     }
 136
 137     if (tv != NULL) {
 138       int maximum_shifts = params->maximum_shifts;
 139       int also_use_subs = params->also_use_subs;
 140       int highest_shift_amount = params->highest_shift_amount;
 141
 142       char *bitstr = get_tarval_bitpattern(tv);
 143       char *p;
 144       int i, last = 0;
 145       int counter = 0;
 146       int curr_bit;
 147       int compr_len = 0;
 148       char compr[MAX_BITSTR];
 149
 150       int singleton;
 151       int end_of_group;
 152       int shift_with_sub[MAX_BITSTR] = { 0 };
 153       int shift_without_sub[MAX_BITSTR] = { 0 };
 154       int shift_with_sub_pos = 0;
 155       int shift_without_sub_pos = 0;
 156
 157 #if DEB
 158       {
 159         long val = get_tarval_long(tv);
 160         fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 161         for(p = bitstr; *p != '\0'; p++)
 162           printf("%c", *p);
 163         printf("\n");
 164       }
 165 #endif
 166
 167       for(p = bitstr; *p != '\0'; p++) {
 168         int bit = *p != '0';
 169
 170         if (bit != last) {
 171           /* The last was 1 we are now at 0 OR
 172            * The last was 0 and we are now at 1 */
 173           compr[compr_len++] = counter;
 174           counter = 1;
 175         }
 176         else
 177           counter++;
 178
 179         last = bit;
 180       }
 181       compr[compr_len++] = counter;
 182
 183
 184 #ifdef DEB
 185       {
 186         const char *prefix = "";
 187         for(i = 0; i < compr_len; i++, prefix = ",")
 188           fprintf(stderr, "%s%d", prefix, compr[i]);
 189         fprintf("\n");
 190       }
 191 #endif
 192
 193       // Go over all recorded one groups.
 194       curr_bit = compr[0];
 195
 196       for(i = 1; i < compr_len; i = end_of_group + 2) {
 197         int j, zeros_in_group, ones_in_group;
 198
 199         ones_in_group = compr[i];
 200         zeros_in_group = 0;
 201
 202         // Scan for singular 0s in a sequence
 203         for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 204           zeros_in_group += 1;
 205           ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 206         }
 207         end_of_group = j - 1;
 208
 209         if(zeros_in_group >= ones_in_group - 1)
 210           end_of_group = i;
 211
 212 #ifdef DEB
 213         fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 214 #endif
 215
 216         singleton = compr[i] == 1 && i == end_of_group;
 217         for(j = i; j <= end_of_group; j += 2) {
 218           int curr_ones = compr[j];
 219           int biased_curr_bit = curr_bit + 1;
 220           int k;
 221
 222 #ifdef DEB
 223           fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 224 #endif
 225
 226           // If this ones group is a singleton group (it has no
 227           // singleton zeros inside
 228           if(singleton)
 229             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 230           else if(j == i)
 231             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 232
 233           for(k = 0; k < curr_ones; k++)
 234             shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 235
 236           curr_bit += curr_ones;
 237           biased_curr_bit = curr_bit + 1;
 238
 239           if(!singleton && j == end_of_group)
 240             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 241           else if(j != end_of_group)
 242             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 243
 244           curr_bit += compr[j + 1];
 245         }
 246
 247       }
 248
 249       {
 250         int *shifts = shift_with_sub;
 251         int n = shift_with_sub_pos;
 252         int highest_shift_wide = 0;
 253         int highest_shift_seq = 0;
 254         int last_shift = 0;
 255
 256         /* If we may not use subs, or we can achive the same with adds,
 257            prefer adds. */
 258         if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 259           shifts = shift_without_sub;
 260           n = shift_without_sub_pos;
 261         }
 262
 263         /* If the number of needed shifts exceeds the given maximum,
 264            use the Mul and exit. */
 265         if(n > maximum_shifts) {
 266 #ifdef DEB
 267           fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 268               maximum_shifts, n);
 269 #endif
 270           return irn;
 271         }
 272
 273         /* Compute the highest shift needed for both, the
 274            sequential and wide representations. */
 275         for(i = 0; i < n; i++) {
 276           int curr = abs(shifts[i]);
 277           int curr_seq = curr - last;
 278
 279           highest_shift_wide = curr > highest_shift_wide ? curr
 280             : highest_shift_wide;
 281           highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
 282             : highest_shift_seq;
 283
 284           last_shift = curr;
 285         }
 286
 287         /* If the highest shift amount is greater than the given limit,
 288            give back the Mul */
 289         if(highest_shift_seq > highest_shift_amount) {
 290 #ifdef DEB
 291           fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 292               highest_shift_seq, highest_shift_amount);
 293 #endif
 294           return irn;
 295         }
 296
 297         /* If we have subs, we cannot do sequential. */
 298         if(1 /* also_use_subs */) {
 299           if(n > 0) {
 300             ir_node *curr = NULL;
 301
 302             i = n - 1;
 303
 304             do {
 305               int curr_shift = shifts[i];
 306               int sub = curr_shift < 0;
 307               int amount = abs(curr_shift) - 1;
 308               ir_node *aux = operand;
 309
 310               assert(amount >= 0 && "What is a negative shift??");
 311
 312               if (amount != 0) {
 313                 ir_node *cnst = new_r_Const_long(current_ir_graph, block, mode_Iu, amount);
 314                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 315               }
 316
 317               if (curr) {
 318                 if (sub)
 319                   curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 320                 else
 321                   curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 322               } else
 323                 curr = aux;
 324
 325             } while(--i >= 0);
 326
 327             res = curr;
 328           }
 329         }
 330
 331 #ifdef DEB
 332         {
 333           const char *prefix = "";
 334           for (i = 0; i < n; ++i) {
 335             fprintf(stderr, "%s%d", prefix, shifts[i]);
 336             prefix = ", ";
 337           }
 338           fprintf(stderr, "\n");
 339         }
 340 #endif
 341
 342       }
 343
 344       if(bitstr)
 345         free(bitstr);
 346     }
 347
 348   }
 349
 350   if (res != irn)
 351     hook_arch_dep_replace_mul_with_shifts(irn);
 352
 353   return res;
 354 }
 355
 356 /**
 357  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 358  */
 359 static int tv_ld2(tarval *tv, int bits)
 360 {
 361   int i, k, num;
 362
 363   for (num = i = 0; i < bits; ++i) {
 364     unsigned char v = get_tarval_sub_bits(tv, i);
 365
 366     if (v) {
 367       int j;
 368
 369       for (j = 0; j < 8; ++j)
 370         if ((1 << j) & v) {
 371           ++num;
 372           k = 8 * i + j;
 373         }
 374     }
 375   }
 376   if (num == 1)
 377     return k;
 378   return -1;
 379 }
 380
 381
 382 /* for shorter lines */
 383 #define ABS(a)    tarval_abs(a)
 384 #define NEG(a)    tarval_neg(a)
 385 #define NOT(a)    tarval_not(a)
 386 #define SHL(a, b) tarval_shl(a, b)
 387 #define SHR(a, b) tarval_shr(a, b)
 388 #define ADD(a, b) tarval_add(a, b)
 389 #define SUB(a, b) tarval_sub(a, b)
 390 #define MUL(a, b) tarval_mul(a, b)
 391 #define DIV(a, b) tarval_div(a, b)
 392 #define MOD(a, b) tarval_mod(a, b)
 393 #define CMP(a, b) tarval_cmp(a, b)
 394 #define CNV(a, m) tarval_convert_to(a, m)
 395 #define ONE(m)    get_mode_one(m)
 396 #define ZERO(m)   get_mode_null(m)
 397
 398 struct ms {
 399   tarval *M;        /**< magic number */
 400   int s;            /**< shift amount */
 401   int need_add;     /**< an additional add is needed */
 402   int need_sub;     /**< an additional sub is needed */
 403 };
 404
 405 /**
 406  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 407  *
 408  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 409  */
 410 static struct ms magic(tarval *d)
 411 {
 412   ir_mode *mode   = get_tarval_mode(d);
 413   ir_mode *u_mode = find_unsigned_mode(mode);
 414   int bits        = get_mode_size_bits(u_mode);
 415   int p;
 416   tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 417   pn_Cmp d_cmp, M_cmp;
 418
 419   tarval *bits_minus_1, *two_bits_1;
 420
 421   struct ms mag;
 422
 423   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 424
 425   /* we need overflow mode to work correctly */
 426   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 427
 428   /* 2^(bits-1) */
 429   bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 430   two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 431
 432   ad  = CNV(ABS(d), u_mode);
 433   t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 434   anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 435   p   = bits - 1;                               /* Init: p */
 436   q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 437   r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 438   q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 439   r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 440
 441   do {
 442     ++p;
 443     q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 444     r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 445
 446     if (CMP(r1, anc) & pn_Cmp_Ge) {
 447       q1 = ADD(q1, ONE(u_mode));
 448       r1 = SUB(r1, anc);
 449     }
 450
 451     q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 452     r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 453
 454     if (CMP(r2, ad) & pn_Cmp_Ge) {
 455       q2 = ADD(q2, ONE(u_mode));
 456       r2 = SUB(r2, ad);
 457     }
 458
 459     delta = SUB(ad, r2);
 460   } while (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(u_mode)) & pn_Cmp_Eq));
 461
 462   d_cmp = CMP(d, ZERO(mode));
 463
 464   if (d_cmp & pn_Cmp_Ge)
 465     mag.M = ADD(CNV(q2, mode), ONE(mode));
 466   else
 467     mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 468
 469   M_cmp = CMP(mag.M, ZERO(mode));
 470
 471   mag.s = p - bits;
 472
 473   /* need an add if d > 0 && M < 0 */
 474   mag.need_add = d_cmp & pn_Cmp_Gt && M_cmp & pn_Cmp_Lt;
 475
 476   /* need a sub if d < 0 && M > 0 */
 477   mag.need_sub = d_cmp & pn_Cmp_Lt && M_cmp & pn_Cmp_Gt;
 478
 479   tarval_set_integer_overflow_mode(rem);
 480
 481   return mag;
 482 }
 483
 484 struct mu {
 485   tarval *M;        /**< magic add constant */
 486   int s;            /**< shift amount */
 487   int need_add;     /**< add indicator */
 488 };
 489
 490 /**
 491  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 492  *
 493  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 494  */
 495 static struct mu magicu(tarval *d)
 496 {
 497   ir_mode *mode   = get_tarval_mode(d);
 498   int bits        = get_mode_size_bits(mode);
 499   int p;
 500   tarval *nc, *delta, *q1, *r1, *q2, *r2;
 501   tarval *bits_minus_1, *two_bits_1, *seven_ff;
 502
 503   struct mu magu;
 504
 505   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 506
 507   /* we need overflow mode to work correctly */
 508   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 509
 510   bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 511   two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 512   seven_ff     = SUB(two_bits_1, ONE(mode));
 513
 514   magu.need_add = 0;                            /* initialize the add indicator */
 515   nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 516   p  = bits - 1;                                /* Init: p */
 517   q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 518   r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 519   q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 520   r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 521
 522   do {
 523     ++p;
 524     if (CMP(r1, SUB(nc, r1)) & pn_Cmp_Ge) {
 525       q1 = ADD(ADD(q1, q1), ONE(mode));
 526       r1 = SUB(ADD(r1, r1), nc);
 527     }
 528     else {
 529       q1 = ADD(q1, q1);
 530       r1 = ADD(r1, r1);
 531     }
 532
 533     if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & pn_Cmp_Ge) {
 534       if (CMP(q2, seven_ff) & pn_Cmp_Ge)
 535         magu.need_add = 1;
 536
 537       q2 = ADD(ADD(q2, q2), ONE(mode));
 538       r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 539     }
 540     else {
 541       if (CMP(q2, two_bits_1) & pn_Cmp_Ge)
 542         magu.need_add = 1;
 543
 544       q2 = ADD(q2, q2);
 545       r2 = ADD(ADD(r2, r2), ONE(mode));
 546     }
 547     delta = SUB(SUB(d, ONE(mode)), r2);
 548   } while (p < 2*bits &&
 549           (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(mode)) & pn_Cmp_Eq)));
 550
 551   magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 552   magu.s = p - bits;                                 /* and shift amount */
 553
 554   tarval_set_integer_overflow_mode(rem);
 555
 556   return magu;
 557 }
 558
 559 /**
 560  * build the Mulh replacement code for n / tv
 561  *
 562  * Note that 'div' might be a mod or DivMod operation as well
 563  */
 564 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
 565 {
 566   dbg_info *dbg  = get_irn_dbg_info(div);
 567   ir_node *n     = get_binop_left(div);
 568   ir_node *block = get_irn_n(div, -1);
 569   ir_mode *mode  = get_irn_mode(n);
 570   int bits       = get_mode_size_bits(mode);
 571   ir_node *q, *t, *c;
 572
 573   /* Beware: do not transform bad code */
 574   if (is_Bad(n) || is_Bad(block))
 575     return div;
 576
 577   if (mode_is_signed(mode)) {
 578     struct ms mag = magic(tv);
 579
 580     /* generate the Mulh instruction */
 581     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 582     q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 583
 584     /* do we need an Add or Sub */
 585     if (mag.need_add)
 586       q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 587     else if (mag.need_sub)
 588       q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 589
 590     /* Do we need the shift */
 591     if (mag.s > 0) {
 592       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 593       q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 594     }
 595
 596     /* final */
 597     c = new_r_Const_long(current_ir_graph, block, mode_Iu, bits-1);
 598     t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 599
 600     q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 601   }
 602   else {
 603     struct mu mag = magicu(tv);
 604     ir_node *c;
 605
 606     /* generate the Mulh instruction */
 607     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 608     q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 609
 610     if (mag.need_add) {
 611       if (mag.s > 0) {
 612         /* use the GM scheme */
 613         t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 614
 615         c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 616         t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 617
 618         t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 619
 620         c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s-1);
 621         q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 622       }
 623       else {
 624         /* use the default scheme */
 625         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 626       }
 627     }
 628     else if (mag.s > 0) { /* default scheme, shift needed */
 629       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 630       q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 631     }
 632   }
 633   return q;
 634 }
 635
 636 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
 637 {
 638   ir_node *res  = irn;
 639
 640   /* If the architecture dependent optimizations were not initialized
 641      or this optimization was not enabled. */
 642   if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 643     return irn;
 644
 645   if (get_irn_opcode(irn) == iro_Div) {
 646     ir_node *c = get_Div_right(irn);
 647     ir_node *block, *left;
 648     ir_mode *mode;
 649     tarval *tv, *ntv;
 650     dbg_info *dbg;
 651     int n, bits;
 652     int k, n_flag;
 653
 654     if (get_irn_op(c) != op_Const)
 655       return irn;
 656
 657     left  = get_Div_left(irn);
 658     mode  = get_irn_mode(left);
 659     block = get_irn_n(irn, -1);
 660     dbg   = get_irn_dbg_info(irn);
 661     tv    = get_Const_tarval(c);
 662
 663     bits = get_mode_size_bits(mode);
 664     n    = (bits + 7) / 8;
 665
 666     k = -1;
 667     if (mode_is_signed(mode)) {
 668       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 669       ntv = tarval_neg(tv);
 670       n_flag = 1;
 671       k = tv_ld2(ntv, n);
 672     }
 673
 674     if (k < 0) {
 675       n_flag = 0;
 676       k = tv_ld2(tv, n);
 677     }
 678
 679     if (k >= 0) { /* division by 2^k or -2^k */
 680       if (mode_is_signed(mode)) {
 681         ir_node *k_node;
 682         ir_node *curr = left;
 683
 684         if (k != 1) {
 685           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 686           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 687         }
 688
 689         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 690         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 691
 692         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 693
 694         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 695         res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 696
 697         if (n_flag) { /* negate the result */
 698           ir_node *k_node;
 699
 700           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 701           res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 702         }
 703       }
 704       else {      /* unsigned case */
 705         ir_node *k_node;
 706
 707         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 708         res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 709       }
 710     }
 711     else {
 712       /* other constant */
 713       if (allow_Mulh(mode))
 714         res = replace_div_by_mulh(irn, tv);
 715     }
 716   }
 717
 718   if (res != irn)
 719     hook_arch_dep_replace_division_by_const(irn);
 720
 721   return res;
 722 }
 723
 724 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
 725 {
 726   ir_node *res  = irn;
 727
 728   /* If the architecture dependent optimizations were not initialized
 729      or this optimization was not enabled. */
 730   if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 731     return irn;
 732
 733   if (get_irn_opcode(irn) == iro_Mod) {
 734     ir_node *c = get_Mod_right(irn);
 735     ir_node *block, *left;
 736     ir_mode *mode;
 737     tarval *tv, *ntv;
 738     dbg_info *dbg;
 739     int n, bits;
 740     int k;
 741
 742     if (get_irn_op(c) != op_Const)
 743       return irn;
 744
 745     left  = get_Mod_left(irn);
 746     mode  = get_irn_mode(left);
 747     block = get_irn_n(irn, -1);
 748     dbg   = get_irn_dbg_info(irn);
 749     tv    = get_Const_tarval(c);
 750
 751     bits = get_mode_size_bits(mode);
 752     n    = (bits + 7) / 8;
 753
 754     k = -1;
 755     if (mode_is_signed(mode)) {
 756       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 757       ntv = tarval_neg(tv);
 758       k = tv_ld2(ntv, n);
 759     }
 760
 761     if (k < 0) {
 762       k = tv_ld2(tv, n);
 763     }
 764
 765     if (k >= 0) {
 766       /* division by 2^k or -2^k:
 767        * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 768        */
 769       if (mode_is_signed(mode)) {
 770         ir_node *k_node;
 771         ir_node *curr = left;
 772
 773         if (k != 1) {
 774           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 775           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 776         }
 777
 778         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 779         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 780
 781         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 782
 783         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 784         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 785
 786         res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 787       }
 788       else {      /* unsigned case */
 789         ir_node *k_node;
 790
 791         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 792         res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 793       }
 794     }
 795     else {
 796       /* other constant */
 797       if (allow_Mulh(mode)) {
 798         res = replace_div_by_mulh(irn, tv);
 799
 800         res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 801
 802         /* res = arch_dep_mul_to_shift(res); */
 803
 804         res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 805       }
 806     }
 807   }
 808
 809   if (res != irn)
 810     hook_arch_dep_replace_division_by_const(irn);
 811
 812   return res;
 813 }
 814
 815 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
 816 {
 817   *div = *mod = NULL;
 818
 819   /* If the architecture dependent optimizations were not initialized
 820      or this optimization was not enabled. */
 821   if (params == NULL ||
 822       ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 823     return;
 824
 825   if (get_irn_opcode(irn) == iro_DivMod) {
 826     ir_node *c = get_DivMod_right(irn);
 827     ir_node *block, *left;
 828     ir_mode *mode;
 829     tarval *tv, *ntv;
 830     dbg_info *dbg;
 831     int n, bits;
 832     int k, n_flag;
 833
 834     if (get_irn_op(c) != op_Const)
 835       return;
 836
 837     left  = get_DivMod_left(irn);
 838     mode  = get_irn_mode(left);
 839     block = get_irn_n(irn, -1);
 840     dbg   = get_irn_dbg_info(irn);
 841     tv    = get_Const_tarval(c);
 842
 843     bits = get_mode_size_bits(mode);
 844     n    = (bits + 7) / 8;
 845
 846     k = -1;
 847     if (mode_is_signed(mode)) {
 848       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 849       ntv = tarval_neg(tv);
 850       n_flag = 1;
 851       k = tv_ld2(ntv, n);
 852     }
 853
 854     if (k < 0) {
 855       n_flag = 0;
 856       k = tv_ld2(tv, n);
 857     }
 858
 859     if (k >= 0) { /* division by 2^k or -2^k */
 860       if (mode_is_signed(mode)) {
 861         ir_node *k_node, *c_k;
 862         ir_node *curr = left;
 863
 864         if (k != 1) {
 865           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 866           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 867         }
 868
 869         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 870         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 871
 872         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 873
 874         c_k    = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 875
 876         *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 877
 878         if (n_flag) { /* negate the div result */
 879           ir_node *k_node;
 880
 881           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 882           *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 883         }
 884
 885         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 886         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 887
 888         *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 889       }
 890       else {      /* unsigned case */
 891         ir_node *k_node;
 892
 893         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 894         *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 895
 896         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 897         *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 898       }
 899     }
 900     else {
 901       /* other constant */
 902       if (allow_Mulh(mode)) {
 903         ir_node *t;
 904
 905         *div = replace_div_by_mulh(irn, tv);
 906
 907         t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 908
 909         /* t = arch_dep_mul_to_shift(t); */
 910
 911         *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 912       }
 913     }
 914   }
 915
 916   if (*div)
 917     hook_arch_dep_replace_division_by_const(irn);
 918 }
 919
 920
 921 static const arch_dep_params_t default_params = {
 922   1,  /* also use subs */
 923   4,  /* maximum shifts */
 924   31, /* maximum shift amount */
 925
 926   0,  /* allow Mulhs */
 927   0,  /* allow Mulus */
 928   32  /* Mulh allowed up to 32 bit */
 929 };
 930
 931 const arch_dep_params_t *arch_dep_default_factory(void) {
 932   return &default_params;
 933 }