nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /*
   2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file irarch.c
  22  * @date 28.9.2004
  23  * @author Sebastian Hack, Michael Beck
  24  * @brief Machine dependent Firm optimizations.
  25  *
  26  * $Id$
  27  */
  28 #ifdef HAVE_CONFIG_H
  29 # include "config.h"
  30 #endif
  31
  32 #ifdef HAVE_STDLIB_H
  33 # include <stdlib.h>
  34 #endif
  35
  36 #include <assert.h>
  37
  38 #include "irnode_t.h"
  39 #include "irgraph_t.h"
  40 #include "irmode_t.h"
  41 #include "iropt_t.h"
  42 #include "ircons_t.h"
  43 #include "irgmod.h"
  44 #include "irvrfy.h"
  45 #include "tv_t.h"
  46 #include "dbginfo_t.h"
  47 #include "iropt_dbg.h"
  48 #include "irflag_t.h"
  49 #include "irhooks.h"
  50 #include "ircons.h"
  51 #include "irarch.h"
  52 #include "irreflect.h"
  53
  54 #undef DEB
  55
  56 #define MAX_BITSTR 64
  57
  58 /* when we need verifying */
  59 #ifdef NDEBUG
  60 # define IRN_VRFY_IRG(res, irg)
  61 #else
  62 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  63 #endif
  64
  65 /** The params got from the factory in arch_dep_init(...). */
  66 static const arch_dep_params_t *params = NULL;
  67
  68 /** The bit mask, which optimizations to apply. */
  69 static arch_dep_opts_t opts;
  70
  71 /* we need this new pseudo op */
  72 static ir_op *op_Mulh = NULL;
  73
  74 /**
  75  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  76  */
  77 static ir_node *
  78 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  79        ir_node *op1, ir_node *op2, ir_mode *mode)
  80 {
  81   ir_node *in[2];
  82   ir_node *res;
  83
  84   in[0] = op1;
  85   in[1] = op2;
  86   res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  87   res = optimize_node(res);
  88   IRN_VRFY_IRG(res, irg);
  89   return res;
  90 }
  91
  92 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  93
  94 void arch_dep_init(arch_dep_params_factory_t factory)
  95 {
  96   opts = arch_dep_none;
  97
  98   if (factory != NULL)
  99     params = factory();
 100
 101   if (! op_Mulh) {
 102     rflct_sig_t *sig;
 103     int mulh_opc = get_next_ir_opcode();
 104
 105     /* create the Mulh operation */
 106     op_Mulh = new_ir_op(mulh_opc, "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0, NULL);
 107     sig = rflct_signature_allocate(1, 3);
 108     rflct_signature_set_arg(sig, 0, 0, "Res", RFLCT_MC(Int), 0, 0);
 109     rflct_signature_set_arg(sig, 1, 0, "Block", RFLCT_MC(BB), 0, 0);
 110     rflct_signature_set_arg(sig, 1, 1, "Op 0", RFLCT_MC(Int), 0, 0);
 111     rflct_signature_set_arg(sig, 1, 2, "Op 1", RFLCT_MC(Int), 0, 0);
 112
 113     rflct_new_opcode(mulh_opc, "Mulh", 0);
 114     rflct_opcode_add_signature(mulh_opc, sig);
 115   }
 116 }
 117
 118 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
 119   opts = the_opts;
 120 }
 121
 122 /** check, whether a mode allows a Mulh instruction. */
 123 static int allow_Mulh(ir_mode *mode)
 124 {
 125   if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
 126     return 0;
 127   return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
 128 }
 129
 130 /* Replace Muls with Shifts and Add/Subs. */
 131 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
 132 {
 133   ir_node *res = irn;
 134   ir_mode *mode = get_irn_mode(irn);
 135
 136   /* If the architecture dependent optimizations were not initialized
 137      or this optimization was not enabled. */
 138   if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
 139     return irn;
 140
 141   if (get_irn_op(irn) == op_Mul && mode_is_int(mode)) {
 142     ir_node *block   = get_irn_n(irn, -1);
 143     ir_node *left    = get_binop_left(irn);
 144     ir_node *right   = get_binop_right(irn);
 145     tarval *tv       = NULL;
 146     ir_node *operand = NULL;
 147
 148     /* Look, if one operand is a constant. */
 149     if (get_irn_opcode(left) == iro_Const) {
 150       tv = get_Const_tarval(left);
 151       operand = right;
 152     } else if(get_irn_opcode(right) == iro_Const) {
 153       tv = get_Const_tarval(right);
 154       operand = left;
 155     }
 156
 157     if (tv != NULL) {
 158       int maximum_shifts = params->maximum_shifts;
 159       int also_use_subs = params->also_use_subs;
 160       int highest_shift_amount = params->highest_shift_amount;
 161
 162       char *bitstr = get_tarval_bitpattern(tv);
 163       char *p;
 164       int i, last = 0;
 165       int counter = 0;
 166       int curr_bit;
 167       int compr_len = 0;
 168       char compr[MAX_BITSTR];
 169
 170       int singleton;
 171       int end_of_group;
 172       int shift_with_sub[MAX_BITSTR] = { 0 };
 173       int shift_without_sub[MAX_BITSTR] = { 0 };
 174       int shift_with_sub_pos = 0;
 175       int shift_without_sub_pos = 0;
 176
 177 #if DEB
 178       {
 179         long val = get_tarval_long(tv);
 180         fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 181         for(p = bitstr; *p != '\0'; p++)
 182           printf("%c", *p);
 183         printf("\n");
 184       }
 185 #endif
 186
 187       for(p = bitstr; *p != '\0'; p++) {
 188         int bit = *p != '0';
 189
 190         if (bit != last) {
 191           /* The last was 1 we are now at 0 OR
 192            * The last was 0 and we are now at 1 */
 193           compr[compr_len++] = counter;
 194           counter = 1;
 195         }
 196         else
 197           counter++;
 198
 199         last = bit;
 200       }
 201       compr[compr_len++] = counter;
 202
 203
 204 #ifdef DEB
 205       {
 206         const char *prefix = "";
 207         for(i = 0; i < compr_len; i++, prefix = ",")
 208           fprintf(stderr, "%s%d", prefix, compr[i]);
 209         fprintf("\n");
 210       }
 211 #endif
 212
 213       /* Go over all recorded one groups. */
 214       curr_bit = compr[0];
 215
 216       for(i = 1; i < compr_len; i = end_of_group + 2) {
 217         int j, zeros_in_group, ones_in_group;
 218
 219         ones_in_group = compr[i];
 220         zeros_in_group = 0;
 221
 222         /* Scan for singular 0s in a sequence. */
 223         for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 224           zeros_in_group += 1;
 225           ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 226         }
 227         end_of_group = j - 1;
 228
 229         if(zeros_in_group >= ones_in_group - 1)
 230           end_of_group = i;
 231
 232 #ifdef DEB
 233         fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 234 #endif
 235
 236         singleton = compr[i] == 1 && i == end_of_group;
 237         for(j = i; j <= end_of_group; j += 2) {
 238           int curr_ones = compr[j];
 239           int biased_curr_bit = curr_bit + 1;
 240           int k;
 241
 242 #ifdef DEB
 243           fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 244 #endif
 245
 246           /* If this ones group is a singleton group (it has no
 247              singleton zeros inside. */
 248           if(singleton)
 249             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 250           else if(j == i)
 251             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 252
 253           for(k = 0; k < curr_ones; k++)
 254             shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 255
 256           curr_bit += curr_ones;
 257           biased_curr_bit = curr_bit + 1;
 258
 259           if(!singleton && j == end_of_group)
 260             shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 261           else if(j != end_of_group)
 262             shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 263
 264           curr_bit += compr[j + 1];
 265         }
 266
 267       }
 268
 269       {
 270         int *shifts = shift_with_sub;
 271         int n = shift_with_sub_pos;
 272         int highest_shift_wide = 0;
 273         int highest_shift_seq = 0;
 274         int last_shift = 0;
 275
 276         /* If we may not use subs, or we can achive the same with adds,
 277            prefer adds. */
 278         if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 279           shifts = shift_without_sub;
 280           n = shift_without_sub_pos;
 281         }
 282
 283         /* If the number of needed shifts exceeds the given maximum,
 284            use the Mul and exit. */
 285         if(n > maximum_shifts) {
 286 #ifdef DEB
 287           fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 288               maximum_shifts, n);
 289 #endif
 290           return irn;
 291         }
 292
 293         /* Compute the highest shift needed for both, the
 294            sequential and wide representations. */
 295         for(i = 0; i < n; i++) {
 296           int curr = abs(shifts[i]);
 297           int curr_seq = curr - last;
 298
 299           highest_shift_wide = curr > highest_shift_wide ? curr
 300             : highest_shift_wide;
 301           highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
 302             : highest_shift_seq;
 303
 304           last_shift = curr;
 305         }
 306
 307         /* If the highest shift amount is greater than the given limit,
 308            give back the Mul */
 309         if(highest_shift_seq > highest_shift_amount) {
 310 #ifdef DEB
 311           fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 312               highest_shift_seq, highest_shift_amount);
 313 #endif
 314           return irn;
 315         }
 316
 317         /* If we have subs, we cannot do sequential. */
 318         if(1 /* also_use_subs */) {
 319           if(n > 0) {
 320             ir_node *curr = NULL;
 321
 322             i = n - 1;
 323
 324             do {
 325               int curr_shift = shifts[i];
 326               int sub = curr_shift < 0;
 327               int amount = abs(curr_shift) - 1;
 328               ir_node *aux = operand;
 329
 330               assert(amount >= 0 && "What is a negative shift??");
 331
 332               if (amount != 0) {
 333                 ir_node *cnst = new_r_Const_long(current_ir_graph, block, mode_Iu, amount);
 334                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 335               }
 336
 337               if (curr) {
 338                 if (sub)
 339                   curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 340                 else
 341                   curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 342               } else
 343                 curr = aux;
 344
 345             } while(--i >= 0);
 346
 347             res = curr;
 348           }
 349         }
 350
 351 #ifdef DEB
 352         {
 353           const char *prefix = "";
 354           for (i = 0; i < n; ++i) {
 355             fprintf(stderr, "%s%d", prefix, shifts[i]);
 356             prefix = ", ";
 357           }
 358           fprintf(stderr, "\n");
 359         }
 360 #endif
 361
 362       }
 363
 364       if(bitstr)
 365         free(bitstr);
 366     }
 367
 368   }
 369
 370   if (res != irn)
 371     hook_arch_dep_replace_mul_with_shifts(irn);
 372
 373   return res;
 374 }
 375
 376 /**
 377  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 378  */
 379 static int tv_ld2(tarval *tv, int bits)
 380 {
 381   int i, k = 0, num;
 382
 383   for (num = i = 0; i < bits; ++i) {
 384     unsigned char v = get_tarval_sub_bits(tv, i);
 385
 386     if (v) {
 387       int j;
 388
 389       for (j = 0; j < 8; ++j)
 390         if ((1 << j) & v) {
 391           ++num;
 392           k = 8 * i + j;
 393         }
 394     }
 395   }
 396   if (num == 1)
 397     return k;
 398   return -1;
 399 }
 400
 401
 402 /* for shorter lines */
 403 #define ABS(a)    tarval_abs(a)
 404 #define NEG(a)    tarval_neg(a)
 405 #define NOT(a)    tarval_not(a)
 406 #define SHL(a, b) tarval_shl(a, b)
 407 #define SHR(a, b) tarval_shr(a, b)
 408 #define ADD(a, b) tarval_add(a, b)
 409 #define SUB(a, b) tarval_sub(a, b)
 410 #define MUL(a, b) tarval_mul(a, b)
 411 #define DIV(a, b) tarval_div(a, b)
 412 #define MOD(a, b) tarval_mod(a, b)
 413 #define CMP(a, b) tarval_cmp(a, b)
 414 #define CNV(a, m) tarval_convert_to(a, m)
 415 #define ONE(m)    get_mode_one(m)
 416 #define ZERO(m)   get_mode_null(m)
 417
 418 /** The result of a the magic() function. */
 419 struct ms {
 420   tarval *M;        /**< magic number */
 421   int s;            /**< shift amount */
 422   int need_add;     /**< an additional add is needed */
 423   int need_sub;     /**< an additional sub is needed */
 424 };
 425
 426 /**
 427  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 428  *
 429  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 430  */
 431 static struct ms magic(tarval *d)
 432 {
 433   ir_mode *mode   = get_tarval_mode(d);
 434   ir_mode *u_mode = find_unsigned_mode(mode);
 435   int bits        = get_mode_size_bits(u_mode);
 436   int p;
 437   tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 438   pn_Cmp d_cmp, M_cmp;
 439
 440   tarval *bits_minus_1, *two_bits_1;
 441
 442   struct ms mag;
 443
 444   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 445
 446   /* we need overflow mode to work correctly */
 447   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 448
 449   /* 2^(bits-1) */
 450   bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 451   two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 452
 453   ad  = CNV(ABS(d), u_mode);
 454   t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 455   anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 456   p   = bits - 1;                               /* Init: p */
 457   q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 458   r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 459   q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 460   r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 461
 462   do {
 463     ++p;
 464     q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 465     r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 466
 467     if (CMP(r1, anc) & pn_Cmp_Ge) {
 468       q1 = ADD(q1, ONE(u_mode));
 469       r1 = SUB(r1, anc);
 470     }
 471
 472     q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 473     r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 474
 475     if (CMP(r2, ad) & pn_Cmp_Ge) {
 476       q2 = ADD(q2, ONE(u_mode));
 477       r2 = SUB(r2, ad);
 478     }
 479
 480     delta = SUB(ad, r2);
 481   } while (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(u_mode)) & pn_Cmp_Eq));
 482
 483   d_cmp = CMP(d, ZERO(mode));
 484
 485   if (d_cmp & pn_Cmp_Ge)
 486     mag.M = ADD(CNV(q2, mode), ONE(mode));
 487   else
 488     mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 489
 490   M_cmp = CMP(mag.M, ZERO(mode));
 491
 492   mag.s = p - bits;
 493
 494   /* need an add if d > 0 && M < 0 */
 495   mag.need_add = d_cmp & pn_Cmp_Gt && M_cmp & pn_Cmp_Lt;
 496
 497   /* need a sub if d < 0 && M > 0 */
 498   mag.need_sub = d_cmp & pn_Cmp_Lt && M_cmp & pn_Cmp_Gt;
 499
 500   tarval_set_integer_overflow_mode(rem);
 501
 502   return mag;
 503 }
 504
 505 /** The result of the magicu() function. */
 506 struct mu {
 507   tarval *M;        /**< magic add constant */
 508   int s;            /**< shift amount */
 509   int need_add;     /**< add indicator */
 510 };
 511
 512 /**
 513  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 514  *
 515  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 516  */
 517 static struct mu magicu(tarval *d)
 518 {
 519   ir_mode *mode   = get_tarval_mode(d);
 520   int bits        = get_mode_size_bits(mode);
 521   int p;
 522   tarval *nc, *delta, *q1, *r1, *q2, *r2;
 523   tarval *bits_minus_1, *two_bits_1, *seven_ff;
 524
 525   struct mu magu;
 526
 527   tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 528
 529   /* we need overflow mode to work correctly */
 530   tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 531
 532   bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 533   two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 534   seven_ff     = SUB(two_bits_1, ONE(mode));
 535
 536   magu.need_add = 0;                            /* initialize the add indicator */
 537   nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 538   p  = bits - 1;                                /* Init: p */
 539   q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 540   r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 541   q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 542   r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 543
 544   do {
 545     ++p;
 546     if (CMP(r1, SUB(nc, r1)) & pn_Cmp_Ge) {
 547       q1 = ADD(ADD(q1, q1), ONE(mode));
 548       r1 = SUB(ADD(r1, r1), nc);
 549     }
 550     else {
 551       q1 = ADD(q1, q1);
 552       r1 = ADD(r1, r1);
 553     }
 554
 555     if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & pn_Cmp_Ge) {
 556       if (CMP(q2, seven_ff) & pn_Cmp_Ge)
 557         magu.need_add = 1;
 558
 559       q2 = ADD(ADD(q2, q2), ONE(mode));
 560       r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 561     }
 562     else {
 563       if (CMP(q2, two_bits_1) & pn_Cmp_Ge)
 564         magu.need_add = 1;
 565
 566       q2 = ADD(q2, q2);
 567       r2 = ADD(ADD(r2, r2), ONE(mode));
 568     }
 569     delta = SUB(SUB(d, ONE(mode)), r2);
 570   } while (p < 2*bits &&
 571           (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(mode)) & pn_Cmp_Eq)));
 572
 573   magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 574   magu.s = p - bits;                                 /* and shift amount */
 575
 576   tarval_set_integer_overflow_mode(rem);
 577
 578   return magu;
 579 }
 580
 581 /**
 582  * Build the Mulh replacement code for n / tv.
 583  *
 584  * Note that 'div' might be a mod or DivMod operation as well
 585  */
 586 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
 587 {
 588   dbg_info *dbg  = get_irn_dbg_info(div);
 589   ir_node *n     = get_binop_left(div);
 590   ir_node *block = get_irn_n(div, -1);
 591   ir_mode *mode  = get_irn_mode(n);
 592   int bits       = get_mode_size_bits(mode);
 593   ir_node *q, *t, *c;
 594
 595   /* Beware: do not transform bad code */
 596   if (is_Bad(n) || is_Bad(block))
 597     return div;
 598
 599   if (mode_is_signed(mode)) {
 600     struct ms mag = magic(tv);
 601
 602     /* generate the Mulh instruction */
 603     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 604     q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 605
 606     /* do we need an Add or Sub */
 607     if (mag.need_add)
 608       q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 609     else if (mag.need_sub)
 610       q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 611
 612     /* Do we need the shift */
 613     if (mag.s > 0) {
 614       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 615       q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 616     }
 617
 618     /* final */
 619     c = new_r_Const_long(current_ir_graph, block, mode_Iu, bits-1);
 620     t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 621
 622     q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 623   }
 624   else {
 625     struct mu mag = magicu(tv);
 626     ir_node *c;
 627
 628     /* generate the Mulh instruction */
 629     c = new_r_Const(current_ir_graph, block, mode, mag.M);
 630     q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 631
 632     if (mag.need_add) {
 633       if (mag.s > 0) {
 634         /* use the GM scheme */
 635         t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 636
 637         c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 638         t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 639
 640         t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 641
 642         c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s-1);
 643         q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 644       }
 645       else {
 646         /* use the default scheme */
 647         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 648       }
 649     }
 650     else if (mag.s > 0) { /* default scheme, shift needed */
 651       c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 652       q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 653     }
 654   }
 655   return q;
 656 }
 657
 658 /* Replace Divs with Shifts and Add/Subs and Mulh. */
 659 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
 660 {
 661   ir_node *res  = irn;
 662
 663   /* If the architecture dependent optimizations were not initialized
 664      or this optimization was not enabled. */
 665   if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 666     return irn;
 667
 668   if (get_irn_opcode(irn) == iro_Div) {
 669     ir_node *c = get_Div_right(irn);
 670     ir_node *block, *left;
 671     ir_mode *mode;
 672     tarval *tv, *ntv;
 673     dbg_info *dbg;
 674     int n, bits;
 675     int k, n_flag;
 676
 677     if (get_irn_op(c) != op_Const)
 678       return irn;
 679
 680     tv = get_Const_tarval(c);
 681
 682     /* check for division by zero */
 683     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 684       return irn;
 685
 686     left  = get_Div_left(irn);
 687     mode  = get_irn_mode(left);
 688     block = get_irn_n(irn, -1);
 689     dbg   = get_irn_dbg_info(irn);
 690
 691     bits = get_mode_size_bits(mode);
 692     n    = (bits + 7) / 8;
 693
 694     k = -1;
 695     if (mode_is_signed(mode)) {
 696       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 697       ntv = tarval_neg(tv);
 698       n_flag = 1;
 699       k = tv_ld2(ntv, n);
 700     }
 701
 702     if (k < 0) {
 703       n_flag = 0;
 704       k = tv_ld2(tv, n);
 705     }
 706
 707     if (k >= 0) { /* division by 2^k or -2^k */
 708       if (mode_is_signed(mode)) {
 709         ir_node *k_node;
 710         ir_node *curr = left;
 711
 712         if (k != 1) {
 713           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 714           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 715         }
 716
 717         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 718         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 719
 720         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 721
 722         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 723         res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 724
 725         if (n_flag) { /* negate the result */
 726           ir_node *k_node;
 727
 728           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 729           res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 730         }
 731       }
 732       else {      /* unsigned case */
 733         ir_node *k_node;
 734
 735         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 736         res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 737       }
 738     }
 739     else {
 740       /* other constant */
 741       if (allow_Mulh(mode))
 742         res = replace_div_by_mulh(irn, tv);
 743     }
 744   }
 745
 746   if (res != irn)
 747     hook_arch_dep_replace_division_by_const(irn);
 748
 749   return res;
 750 }
 751
 752 /* Replace Mods with Shifts and Add/Subs and Mulh. */
 753 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
 754 {
 755   ir_node *res  = irn;
 756
 757   /* If the architecture dependent optimizations were not initialized
 758      or this optimization was not enabled. */
 759   if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 760     return irn;
 761
 762   if (get_irn_opcode(irn) == iro_Mod) {
 763     ir_node *c = get_Mod_right(irn);
 764     ir_node *block, *left;
 765     ir_mode *mode;
 766     tarval *tv, *ntv;
 767     dbg_info *dbg;
 768     int n, bits;
 769     int k;
 770
 771     if (get_irn_op(c) != op_Const)
 772       return irn;
 773
 774     tv = get_Const_tarval(c);
 775
 776     /* check for division by zero */
 777     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 778       return irn;
 779
 780     left  = get_Mod_left(irn);
 781     mode  = get_irn_mode(left);
 782     block = get_irn_n(irn, -1);
 783     dbg   = get_irn_dbg_info(irn);
 784     bits = get_mode_size_bits(mode);
 785     n    = (bits + 7) / 8;
 786
 787     k = -1;
 788     if (mode_is_signed(mode)) {
 789       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 790       ntv = tarval_neg(tv);
 791       k = tv_ld2(ntv, n);
 792     }
 793
 794     if (k < 0) {
 795       k = tv_ld2(tv, n);
 796     }
 797
 798     if (k >= 0) {
 799       /* division by 2^k or -2^k:
 800        * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 801        */
 802       if (mode_is_signed(mode)) {
 803         ir_node *k_node;
 804         ir_node *curr = left;
 805
 806         if (k != 1) {
 807           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 808           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 809         }
 810
 811         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 812         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 813
 814         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 815
 816         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 817         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 818
 819         res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 820       }
 821       else {      /* unsigned case */
 822         ir_node *k_node;
 823
 824         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 825         res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 826       }
 827     }
 828     else {
 829       /* other constant */
 830       if (allow_Mulh(mode)) {
 831         res = replace_div_by_mulh(irn, tv);
 832
 833         res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 834
 835         /* res = arch_dep_mul_to_shift(res); */
 836
 837         res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 838       }
 839     }
 840   }
 841
 842   if (res != irn)
 843     hook_arch_dep_replace_division_by_const(irn);
 844
 845   return res;
 846 }
 847
 848 /* Replace DivMods with Shifts and Add/Subs and Mulh. */
 849 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
 850 {
 851   *div = *mod = NULL;
 852
 853   /* If the architecture dependent optimizations were not initialized
 854      or this optimization was not enabled. */
 855   if (params == NULL ||
 856       ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 857     return;
 858
 859   if (get_irn_opcode(irn) == iro_DivMod) {
 860     ir_node *c = get_DivMod_right(irn);
 861     ir_node *block, *left;
 862     ir_mode *mode;
 863     tarval *tv, *ntv;
 864     dbg_info *dbg;
 865     int n, bits;
 866     int k, n_flag;
 867
 868     if (get_irn_op(c) != op_Const)
 869       return;
 870
 871     tv = get_Const_tarval(c);
 872
 873     /* check for division by zero */
 874     if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 875       return;
 876
 877     left  = get_DivMod_left(irn);
 878     mode  = get_irn_mode(left);
 879     block = get_irn_n(irn, -1);
 880     dbg   = get_irn_dbg_info(irn);
 881
 882     bits = get_mode_size_bits(mode);
 883     n    = (bits + 7) / 8;
 884
 885     k = -1;
 886     if (mode_is_signed(mode)) {
 887       /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 888       ntv = tarval_neg(tv);
 889       n_flag = 1;
 890       k = tv_ld2(ntv, n);
 891     }
 892
 893     if (k < 0) {
 894       n_flag = 0;
 895       k = tv_ld2(tv, n);
 896     }
 897
 898     if (k >= 0) { /* division by 2^k or -2^k */
 899       if (mode_is_signed(mode)) {
 900         ir_node *k_node, *c_k;
 901         ir_node *curr = left;
 902
 903         if (k != 1) {
 904           k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 905           curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 906         }
 907
 908         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 909         curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 910
 911         curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 912
 913         c_k    = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 914
 915         *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 916
 917         if (n_flag) { /* negate the div result */
 918           ir_node *k_node;
 919
 920           k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 921           *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 922         }
 923
 924         k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 925         curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 926
 927         *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 928       }
 929       else {      /* unsigned case */
 930         ir_node *k_node;
 931
 932         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 933         *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 934
 935         k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 936         *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 937       }
 938     }
 939     else {
 940       /* other constant */
 941       if (allow_Mulh(mode)) {
 942         ir_node *t;
 943
 944         *div = replace_div_by_mulh(irn, tv);
 945
 946         t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 947
 948         /* t = arch_dep_mul_to_shift(t); */
 949
 950         *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 951       }
 952     }
 953   }
 954
 955   if (*div)
 956     hook_arch_dep_replace_division_by_const(irn);
 957 }
 958
 959
 960 static const arch_dep_params_t default_params = {
 961   1,  /* also use subs */
 962   4,  /* maximum shifts */
 963   31, /* maximum shift amount */
 964
 965   0,  /* allow Mulhs */
 966   0,  /* allow Mulus */
 967   32  /* Mulh allowed up to 32 bit */
 968 };
 969
 970 /* A default parameter factory for testing purposes. */
 971 const arch_dep_params_t *arch_dep_default_factory(void) {
 972   return &default_params;
 973 }