nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /*
   2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief   Machine dependent Firm optimizations.
  23  * @date    28.9.2004
  24  * @author  Sebastian Hack, Michael Beck
  25  * @version $Id$
  26  */
  27 #ifdef HAVE_CONFIG_H
  28 # include "config.h"
  29 #endif
  30
  31 #ifdef HAVE_STDLIB_H
  32 # include <stdlib.h>
  33 #endif
  34
  35 #include <assert.h>
  36
  37 #include "irnode_t.h"
  38 #include "irgraph_t.h"
  39 #include "irmode_t.h"
  40 #include "iropt_t.h"
  41 #include "ircons_t.h"
  42 #include "irgmod.h"
  43 #include "irvrfy.h"
  44 #include "tv_t.h"
  45 #include "dbginfo_t.h"
  46 #include "iropt_dbg.h"
  47 #include "irflag_t.h"
  48 #include "irhooks.h"
  49 #include "ircons.h"
  50 #include "irarch.h"
  51
  52 #undef DEB
  53
  54 #define MAX_BITSTR 64
  55
  56 /* when we need verifying */
  57 #ifdef NDEBUG
  58 # define IRN_VRFY_IRG(res, irg)
  59 #else
  60 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  61 #endif
  62
  63 /** The params got from the factory in arch_dep_init(...). */
  64 static const arch_dep_params_t *params = NULL;
  65
  66 /** The bit mask, which optimizations to apply. */
  67 static arch_dep_opts_t opts;
  68
  69 /* we need this new pseudo op */
  70 static ir_op *op_Mulh = NULL;
  71
  72 /**
  73  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  74  */
  75 static ir_node *
  76 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  77        ir_node *op1, ir_node *op2, ir_mode *mode)
  78 {
  79   ir_node *in[2];
  80   ir_node *res;
  81
  82   in[0] = op1;
  83   in[1] = op2;
  84   res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  85   res = optimize_node(res);
  86   IRN_VRFY_IRG(res, irg);
  87   return res;
  88 }
  89
  90 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  91
  92 void arch_dep_init(arch_dep_params_factory_t factory)
  93 {
  94   opts = arch_dep_none;
  95
  96   if (factory != NULL)
  97     params = factory();
  98
  99   if (! op_Mulh) {
 100     int mulh_opc = get_next_ir_opcode();
 101
 102     /* create the Mulh operation */
 103     op_Mulh = new_ir_op(mulh_opc, "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0, NULL);
 104   }
 105 }
 106
 107 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
 108   opts = the_opts;
 109 }
 110
 111 /** check, whether a mode allows a Mulh instruction. */
 112 static int allow_Mulh(ir_mode *mode)
 113 {
 114   if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
 115     return 0;
 116   return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
 117 }
 118
 119 /* Replace Muls with Shifts and Add/Subs. */
 120 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
 121 {
 122   ir_node *res = irn;
 123   ir_mode *mode = get_irn_mode(irn);
 124
 125   /* If the architecture dependent optimizations were not initialized
 126      or this optimization was not enabled. */
 127   if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
 128     return irn;
 129
 130   if (get_irn_op(irn) == op_Mul && mode_is_int(mode)) {
 131     ir_node *block   = get_irn_n(irn, -1);
 132     ir_node *left    = get_binop_left(irn);
 133     ir_node *right   = get_binop_right(irn);
 134     tarval *tv       = NULL;
 135     ir_node *operand = NULL;
 136
 137     /* Look, if one operand is a constant. */
 138     if (get_irn_opcode(left) == iro_Const) {
 139       tv = get_Const_tarval(left);
 140       operand = right;
 141     } else if(get_irn_opcode(right) == iro_Const) {
 142       tv = get_Const_tarval(right);
 143       operand = left;
 144     }
 145
 146     if (tv != NULL) {
 147       int maximum_shifts = params->maximum_shifts;
 148       int also_use_subs = params->also_use_subs;
 149       int highest_shift_amount = params->highest_shift_amount;
 150
 151       char *bitstr = get_tarval_bitpattern(tv);
 152       char *p;
 153       int i, last = 0;
 154       int counter = 0;
 155       int curr_bit;
 156       int compr_len = 0;
 157       char compr[MAX_BITSTR];
 158
 159       int singleton;
 160       int end_of_group;
 161       int shift_with_sub[MAX_BITSTR] = { 0 };
 162       int shift_without_sub[MAX_BITSTR] = { 0 };
 163       int shift_with_sub_pos = 0;
 164       int shift_without_sub_pos = 0;
 165
 166 #if DEB
 167       {
 168         long val = get_tarval_long(tv);
 169         fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 170         for(p = bitstr; *p != '\0'; p++)
 171           printf("%c", *p);
 172         printf("\n");
 173       }
 174 #endif
 175
 176       for(p = bitstr; *p != '\0'; p++) {
 177         int bit = *p != '0';
 178
 179         if (bit != last) {
 180           /* The last was 1 we are now at 0 OR
 181            * The last was 0 and we are now at 1 */
 182           compr[compr_len++] = counter;
 183           counter = 1;
 184         }
 185         else
 186           counter++;
 187
 188         last = bit;
 189       }
 190       compr[compr_len++] = counter;
 191
 192
 193 #ifdef DEB
 194       {
 195         const char *prefix = "";
 196         for(i = 0; i < compr_len; i++, prefix = ",")
 197           fprintf(stderr, "%s%d", prefix, compr[i]);
 198         fprintf("\n");
 199       }
 200 #endif
 201
 202       /* Go over all recorded one groups. */
 203       curr_bit = compr[0];
 204
 205       for(i = 1; i < compr_len; i = end_of_group + 2) {
 206         int j, zeros_in_group, ones_in_group;
 207
 208         ones_in_group = compr[i];
 209         zeros_in_group = 0;
 210
 211         /* Scan for singular 0s in a sequence. */
 212         for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 213           zeros_in_group += 1;
 214           ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 215         }
 216         end_of_group = j - 1;
 217
 218         if(zeros_in_group >= ones_in_group - 1)
 219           end_of_group = i;
 220
 221 #ifdef DEB
 222         fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 223 #endif
 224
 225         singleton = compr[i] == 1 && i == end_of_group;
 226         for(j = i; j <= end_of_group; j += 2) {
 227           int curr_ones = compr[j];
 228           int biased_curr_bit = curr_bit + 1;
 229           int k;
 230
 231 #ifdef DEB
 232           fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 233 #endif
 234
 235           /* If this ones group is a singleton group (it has no
 236              singleton zeros inside. */
 237           if(singleton)
 238             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 239           else if(j == i)
 240             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 241
 242           for(k = 0; k < curr_ones; k++)
 243             shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 244
 245           curr_bit += curr_ones;
 246           biased_curr_bit = curr_bit + 1;
 247
 248           if(!singleton && j == end_of_group)
 249             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 250           else if(j != end_of_group)
 251             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 252
 253           curr_bit += compr[j + 1];
 254         }
 255
 256       }
 257
 258       {
 259         int *shifts = shift_with_sub;
 260         int n = shift_with_sub_pos;
 261         int highest_shift_wide = 0;
 262         int highest_shift_seq = 0;
 263         int last_shift = 0;
 264
 265         /* If we may not use subs, or we can achive the same with adds,
 266            prefer adds. */
 267         if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 268           shifts = shift_without_sub;
 269           n = shift_without_sub_pos;
 270         }
 271
 272         /* If the number of needed shifts exceeds the given maximum,
 273            use the Mul and exit. */
 274         if(n > maximum_shifts) {
 275 #ifdef DEB
 276           fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 277               maximum_shifts, n);
 278 #endif
 279           return irn;
 280         }
 281
 282         /* Compute the highest shift needed for both, the
 283            sequential and wide representations. */
 284         for(i = 0; i < n; i++) {
 285           int curr = abs(shifts[i]);
 286           int curr_seq = curr - last;
 287
 288           highest_shift_wide = curr > highest_shift_wide ? curr
 289             : highest_shift_wide;
 290           highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
 291             : highest_shift_seq;
 292
 293           last_shift = curr;
 294         }
 295
 296         /* If the highest shift amount is greater than the given limit,
 297            give back the Mul */
 298         if(highest_shift_seq > highest_shift_amount) {
 299 #ifdef DEB
 300           fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 301               highest_shift_seq, highest_shift_amount);
 302 #endif
 303           return irn;
 304         }
 305
 306         /* If we have subs, we cannot do sequential. */
 307         if(1 /* also_use_subs */) {
 308           if(n > 0) {
 309             ir_node *curr = NULL;
 310
 311             i = n - 1;
 312
 313             do {
 314               int curr_shift = shifts[i];
 315               int sub = curr_shift < 0;
 316               int amount = abs(curr_shift) - 1;
 317               ir_node *aux = operand;
 318
 319               assert(amount >= 0 && "What is a negative shift??");
 320
 321               if (amount != 0) {
 322                 ir_node *cnst = new_r_Const_long(current_ir_graph, block, mode_Iu, amount);
 323                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 324               }
 325
 326               if (curr) {
 327                 if (sub)
 328                   curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 329                 else
 330                   curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 331               } else
 332                 curr = aux;
 333
 334             } while(--i >= 0);
 335
 336             res = curr;
 337           }
 338         }
 339
 340 #ifdef DEB
 341         {
 342           const char *prefix = "";
 343           for (i = 0; i < n; ++i) {
 344             fprintf(stderr, "%s%d", prefix, shifts[i]);
 345             prefix = ", ";
 346           }
 347           fprintf(stderr, "\n");
 348         }
 349 #endif
 350
 351       }
 352
 353       if(bitstr)
 354         free(bitstr);
 355     }
 356
 357   }
 358
 359   if (res != irn)
 360     hook_arch_dep_replace_mul_with_shifts(irn);
 361
 362   return res;
 363 }
 364
 365 /**
 366  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 367  */
 368 static int tv_ld2(tarval *tv, int bits)
 369 {
 370   int i, k = 0, num;
 371
 372   for (num = i = 0; i < bits; ++i) {
 373     unsigned char v = get_tarval_sub_bits(tv, i);
 374
 375     if (v) {
 376       int j;
 377
 378       for (j = 0; j < 8; ++j)
 379         if ((1 << j) & v) {
 380           ++num;
 381           k = 8 * i + j;
 382         }
 383     }
 384   }
 385   if (num == 1)
 386     return k;
 387   return -1;
 388 }
 389
 390
 391 /* for shorter lines */
 392 #define ABS(a)    tarval_abs(a)
 393 #define NEG(a)    tarval_neg(a)
 394 #define NOT(a)    tarval_not(a)
 395 #define SHL(a, b) tarval_shl(a, b)
 396 #define SHR(a, b) tarval_shr(a, b)
 397 #define ADD(a, b) tarval_add(a, b)
 398 #define SUB(a, b) tarval_sub(a, b)
 399 #define MUL(a, b) tarval_mul(a, b)
 400 #define DIV(a, b) tarval_div(a, b)
 401 #define MOD(a, b) tarval_mod(a, b)
 402 #define CMP(a, b) tarval_cmp(a, b)
 403 #define CNV(a, m) tarval_convert_to(a, m)
 404 #define ONE(m)    get_mode_one(m)
 405 #define ZERO(m)   get_mode_null(m)
 406
 407 /** The result of a the magic() function. */
 408 struct ms {
 409   tarval *M;        /**< magic number */
 410   int s;            /**< shift amount */
 411   int need_add;     /**< an additional add is needed */
 412   int need_sub;     /**< an additional sub is needed */
 413 };
 414
 415 /**
 416  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 417  *
 418  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 419  */
 420 static struct ms magic(tarval *d)
 421 {
 422   ir_mode *mode   = get_tarval_mode(d);
 423   ir_mode *u_mode = find_unsigned_mode(mode);
 424   int bits        = get_mode_size_bits(u_mode);
 425   int p;
 426   tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 427   pn_Cmp d_cmp, M_cmp;
 428
 429   tarval *bits_minus_1, *two_bits_1;
 430
 431   struct ms mag;
 432
 433   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 434
 435   /* we need overflow mode to work correctly */
 436   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 437
 438   /* 2^(bits-1) */
 439   bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 440   two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 441
 442   ad  = CNV(ABS(d), u_mode);
 443   t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 444   anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 445   p   = bits - 1;                               /* Init: p */
 446   q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 447   r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 448   q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 449   r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 450
 451   do {
 452     ++p;
 453     q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 454     r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 455
 456     if (CMP(r1, anc) & pn_Cmp_Ge) {
 457       q1 = ADD(q1, ONE(u_mode));
 458       r1 = SUB(r1, anc);
 459     }
 460
 461     q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 462     r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 463
 464     if (CMP(r2, ad) & pn_Cmp_Ge) {
 465       q2 = ADD(q2, ONE(u_mode));
 466       r2 = SUB(r2, ad);
 467     }
 468
 469     delta = SUB(ad, r2);
 470   } while (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(u_mode)) & pn_Cmp_Eq));
 471
 472   d_cmp = CMP(d, ZERO(mode));
 473
 474   if (d_cmp & pn_Cmp_Ge)
 475     mag.M = ADD(CNV(q2, mode), ONE(mode));
 476   else
 477     mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 478
 479   M_cmp = CMP(mag.M, ZERO(mode));
 480
 481   mag.s = p - bits;
 482
 483   /* need an add if d > 0 && M < 0 */
 484   mag.need_add = d_cmp & pn_Cmp_Gt && M_cmp & pn_Cmp_Lt;
 485
 486   /* need a sub if d < 0 && M > 0 */
 487   mag.need_sub = d_cmp & pn_Cmp_Lt && M_cmp & pn_Cmp_Gt;
 488
 489   tarval_set_integer_overflow_mode(rem);
 490
 491   return mag;
 492 }
 493
 494 /** The result of the magicu() function. */
 495 struct mu {
 496   tarval *M;        /**< magic add constant */
 497   int s;            /**< shift amount */
 498   int need_add;     /**< add indicator */
 499 };
 500
 501 /**
 502  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 503  *
 504  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 505  */
 506 static struct mu magicu(tarval *d)
 507 {
 508   ir_mode *mode   = get_tarval_mode(d);
 509   int bits        = get_mode_size_bits(mode);
 510   int p;
 511   tarval *nc, *delta, *q1, *r1, *q2, *r2;
 512   tarval *bits_minus_1, *two_bits_1, *seven_ff;
 513
 514   struct mu magu;
 515
 516   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 517
 518   /* we need overflow mode to work correctly */
 519   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 520
 521   bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 522   two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 523   seven_ff     = SUB(two_bits_1, ONE(mode));
 524
 525   magu.need_add = 0;                            /* initialize the add indicator */
 526   nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 527   p  = bits - 1;                                /* Init: p */
 528   q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 529   r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 530   q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 531   r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 532
 533   do {
 534     ++p;
 535     if (CMP(r1, SUB(nc, r1)) & pn_Cmp_Ge) {
 536       q1 = ADD(ADD(q1, q1), ONE(mode));
 537       r1 = SUB(ADD(r1, r1), nc);
 538     }
 539     else {
 540       q1 = ADD(q1, q1);
 541       r1 = ADD(r1, r1);
 542     }
 543
 544     if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & pn_Cmp_Ge) {
 545       if (CMP(q2, seven_ff) & pn_Cmp_Ge)
 546         magu.need_add = 1;
 547
 548       q2 = ADD(ADD(q2, q2), ONE(mode));
 549       r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 550     }
 551     else {
 552       if (CMP(q2, two_bits_1) & pn_Cmp_Ge)
 553         magu.need_add = 1;
 554
 555       q2 = ADD(q2, q2);
 556       r2 = ADD(ADD(r2, r2), ONE(mode));
 557     }
 558     delta = SUB(SUB(d, ONE(mode)), r2);
 559   } while (p < 2*bits &&
 560           (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(mode)) & pn_Cmp_Eq)));
 561
 562   magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 563   magu.s = p - bits;                                 /* and shift amount */
 564
 565   tarval_set_integer_overflow_mode(rem);
 566
 567   return magu;
 568 }
 569
 570 /**
 571  * Build the Mulh replacement code for n / tv.
 572  *
 573  * Note that 'div' might be a mod or DivMod operation as well
 574  */
 575 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
 576 {
 577   dbg_info *dbg  = get_irn_dbg_info(div);
 578   ir_node *n     = get_binop_left(div);
 579   ir_node *block = get_irn_n(div, -1);
 580   ir_mode *mode  = get_irn_mode(n);
 581   int bits       = get_mode_size_bits(mode);
 582   ir_node *q, *t, *c;
 583
 584   /* Beware: do not transform bad code */
 585   if (is_Bad(n) || is_Bad(block))
 586     return div;
 587
 588   if (mode_is_signed(mode)) {
 589     struct ms mag = magic(tv);
 590
 591     /* generate the Mulh instruction */
 592     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 593     q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 594
 595     /* do we need an Add or Sub */
 596     if (mag.need_add)
 597       q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 598     else if (mag.need_sub)
 599       q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 600
 601     /* Do we need the shift */
 602     if (mag.s > 0) {
 603       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 604       q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 605     }
 606
 607     /* final */
 608     c = new_r_Const_long(current_ir_graph, block, mode_Iu, bits-1);
 609     t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 610
 611     q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 612   }
 613   else {
 614     struct mu mag = magicu(tv);
 615     ir_node *c;
 616
 617     /* generate the Mulh instruction */
 618     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 619     q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 620
 621     if (mag.need_add) {
 622       if (mag.s > 0) {
 623         /* use the GM scheme */
 624         t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 625
 626         c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 627         t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 628
 629         t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 630
 631         c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s-1);
 632         q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 633       }
 634       else {
 635         /* use the default scheme */
 636         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 637       }
 638     }
 639     else if (mag.s > 0) { /* default scheme, shift needed */
 640       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 641       q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 642     }
 643   }
 644   return q;
 645 }
 646
 647 /* Replace Divs with Shifts and Add/Subs and Mulh. */
 648 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
 649 {
 650   ir_node *res  = irn;
 651
 652   /* If the architecture dependent optimizations were not initialized
 653      or this optimization was not enabled. */
 654   if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 655     return irn;
 656
 657   if (get_irn_opcode(irn) == iro_Div) {
 658     ir_node *c = get_Div_right(irn);
 659     ir_node *block, *left;
 660     ir_mode *mode;
 661     tarval *tv, *ntv;
 662     dbg_info *dbg;
 663     int n, bits;
 664     int k, n_flag;
 665
 666     if (get_irn_op(c) != op_Const)
 667       return irn;
 668
 669     tv = get_Const_tarval(c);
 670
 671     /* check for division by zero */
 672     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 673       return irn;
 674
 675     left  = get_Div_left(irn);
 676     mode  = get_irn_mode(left);
 677     block = get_irn_n(irn, -1);
 678     dbg   = get_irn_dbg_info(irn);
 679
 680     bits = get_mode_size_bits(mode);
 681     n    = (bits + 7) / 8;
 682
 683     k = -1;
 684     if (mode_is_signed(mode)) {
 685       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 686       ntv = tarval_neg(tv);
 687       n_flag = 1;
 688       k = tv_ld2(ntv, n);
 689     }
 690
 691     if (k < 0) {
 692       n_flag = 0;
 693       k = tv_ld2(tv, n);
 694     }
 695
 696     if (k >= 0) { /* division by 2^k or -2^k */
 697       if (mode_is_signed(mode)) {
 698         ir_node *k_node;
 699         ir_node *curr = left;
 700
 701         if (k != 1) {
 702           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 703           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 704         }
 705
 706         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 707         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 708
 709         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 710
 711         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 712         res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 713
 714         if (n_flag) { /* negate the result */
 715           ir_node *k_node;
 716
 717           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 718           res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 719         }
 720       }
 721       else {      /* unsigned case */
 722         ir_node *k_node;
 723
 724         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 725         res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 726       }
 727     }
 728     else {
 729       /* other constant */
 730       if (allow_Mulh(mode))
 731         res = replace_div_by_mulh(irn, tv);
 732     }
 733   }
 734
 735   if (res != irn)
 736     hook_arch_dep_replace_division_by_const(irn);
 737
 738   return res;
 739 }
 740
 741 /* Replace Mods with Shifts and Add/Subs and Mulh. */
 742 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
 743 {
 744   ir_node *res  = irn;
 745
 746   /* If the architecture dependent optimizations were not initialized
 747      or this optimization was not enabled. */
 748   if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 749     return irn;
 750
 751   if (get_irn_opcode(irn) == iro_Mod) {
 752     ir_node *c = get_Mod_right(irn);
 753     ir_node *block, *left;
 754     ir_mode *mode;
 755     tarval *tv, *ntv;
 756     dbg_info *dbg;
 757     int n, bits;
 758     int k;
 759
 760     if (get_irn_op(c) != op_Const)
 761       return irn;
 762
 763     tv = get_Const_tarval(c);
 764
 765     /* check for division by zero */
 766     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 767       return irn;
 768
 769     left  = get_Mod_left(irn);
 770     mode  = get_irn_mode(left);
 771     block = get_irn_n(irn, -1);
 772     dbg   = get_irn_dbg_info(irn);
 773     bits = get_mode_size_bits(mode);
 774     n    = (bits + 7) / 8;
 775
 776     k = -1;
 777     if (mode_is_signed(mode)) {
 778       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 779       ntv = tarval_neg(tv);
 780       k = tv_ld2(ntv, n);
 781     }
 782
 783     if (k < 0) {
 784       k = tv_ld2(tv, n);
 785     }
 786
 787     if (k >= 0) {
 788       /* division by 2^k or -2^k:
 789        * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 790        */
 791       if (mode_is_signed(mode)) {
 792         ir_node *k_node;
 793         ir_node *curr = left;
 794
 795         if (k != 1) {
 796           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 797           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 798         }
 799
 800         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 801         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 802
 803         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 804
 805         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 806         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 807
 808         res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 809       }
 810       else {      /* unsigned case */
 811         ir_node *k_node;
 812
 813         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 814         res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 815       }
 816     }
 817     else {
 818       /* other constant */
 819       if (allow_Mulh(mode)) {
 820         res = replace_div_by_mulh(irn, tv);
 821
 822         res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 823
 824         /* res = arch_dep_mul_to_shift(res); */
 825
 826         res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 827       }
 828     }
 829   }
 830
 831   if (res != irn)
 832     hook_arch_dep_replace_division_by_const(irn);
 833
 834   return res;
 835 }
 836
 837 /* Replace DivMods with Shifts and Add/Subs and Mulh. */
 838 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
 839 {
 840   *div = *mod = NULL;
 841
 842   /* If the architecture dependent optimizations were not initialized
 843      or this optimization was not enabled. */
 844   if (params == NULL ||
 845       ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 846     return;
 847
 848   if (get_irn_opcode(irn) == iro_DivMod) {
 849     ir_node *c = get_DivMod_right(irn);
 850     ir_node *block, *left;
 851     ir_mode *mode;
 852     tarval *tv, *ntv;
 853     dbg_info *dbg;
 854     int n, bits;
 855     int k, n_flag;
 856
 857     if (get_irn_op(c) != op_Const)
 858       return;
 859
 860     tv = get_Const_tarval(c);
 861
 862     /* check for division by zero */
 863     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 864       return;
 865
 866     left  = get_DivMod_left(irn);
 867     mode  = get_irn_mode(left);
 868     block = get_irn_n(irn, -1);
 869     dbg   = get_irn_dbg_info(irn);
 870
 871     bits = get_mode_size_bits(mode);
 872     n    = (bits + 7) / 8;
 873
 874     k = -1;
 875     if (mode_is_signed(mode)) {
 876       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 877       ntv = tarval_neg(tv);
 878       n_flag = 1;
 879       k = tv_ld2(ntv, n);
 880     }
 881
 882     if (k < 0) {
 883       n_flag = 0;
 884       k = tv_ld2(tv, n);
 885     }
 886
 887     if (k >= 0) { /* division by 2^k or -2^k */
 888       if (mode_is_signed(mode)) {
 889         ir_node *k_node, *c_k;
 890         ir_node *curr = left;
 891
 892         if (k != 1) {
 893           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 894           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 895         }
 896
 897         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 898         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 899
 900         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 901
 902         c_k    = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 903
 904         *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 905
 906         if (n_flag) { /* negate the div result */
 907           ir_node *k_node;
 908
 909           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 910           *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 911         }
 912
 913         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 914         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 915
 916         *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 917       }
 918       else {      /* unsigned case */
 919         ir_node *k_node;
 920
 921         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 922         *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 923
 924         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 925         *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 926       }
 927     }
 928     else {
 929       /* other constant */
 930       if (allow_Mulh(mode)) {
 931         ir_node *t;
 932
 933         *div = replace_div_by_mulh(irn, tv);
 934
 935         t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 936
 937         /* t = arch_dep_mul_to_shift(t); */
 938
 939         *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 940       }
 941     }
 942   }
 943
 944   if (*div)
 945     hook_arch_dep_replace_division_by_const(irn);
 946 }
 947
 948
 949 static const arch_dep_params_t default_params = {
 950   1,  /* also use subs */
 951   4,  /* maximum shifts */
 952   31, /* maximum shift amount */
 953
 954   0,  /* allow Mulhs */
 955   0,  /* allow Mulus */
 956   32  /* Mulh allowed up to 32 bit */
 957 };
 958
 959 /* A default parameter factory for testing purposes. */
 960 const arch_dep_params_t *arch_dep_default_factory(void) {
 961   return &default_params;
 962 }