nsz Git - libfirm/blob - ir/ir/irarch.c

   1 /*
   2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief   Machine dependent Firm optimizations.
  23  * @date    28.9.2004
  24  * @author  Sebastian Hack, Michael Beck
  25  * @version $Id$
  26  */
  27 #ifdef HAVE_CONFIG_H
  28 # include "config.h"
  29 #endif
  30
  31 #ifdef HAVE_STDLIB_H
  32 # include <stdlib.h>
  33 #endif
  34
  35 #include <assert.h>
  36
  37 #include "irnode_t.h"
  38 #include "irgraph_t.h"
  39 #include "irmode_t.h"
  40 #include "iropt_t.h"
  41 #include "ircons_t.h"
  42 #include "irgmod.h"
  43 #include "irvrfy.h"
  44 #include "tv_t.h"
  45 #include "dbginfo_t.h"
  46 #include "iropt_dbg.h"
  47 #include "irflag_t.h"
  48 #include "irhooks.h"
  49 #include "ircons.h"
  50 #include "irarch.h"
  51
  52 #undef DEB
  53
  54 #define MAX_BITSTR 64
  55
  56 /* when we need verifying */
  57 #ifdef NDEBUG
  58 # define IRN_VRFY_IRG(res, irg)
  59 #else
  60 # define IRN_VRFY_IRG(res, irg)  irn_vrfy_irg(res, irg)
  61 #endif
  62
  63 /** The params got from the factory in arch_dep_init(...). */
  64 static const ir_settings_arch_dep_t *params = NULL;
  65
  66 /** The bit mask, which optimizations to apply. */
  67 static arch_dep_opts_t opts;
  68
  69 /* we need this new pseudo op */
  70 static ir_op *op_Mulh = NULL;
  71
  72 /**
  73  * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
  74  */
  75 static ir_node *
  76 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
  77        ir_node *op1, ir_node *op2, ir_mode *mode) {
  78         ir_node *in[2];
  79         ir_node *res;
  80
  81         in[0] = op1;
  82         in[1] = op2;
  83         res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
  84         res = optimize_node(res);
  85         IRN_VRFY_IRG(res, irg);
  86         return res;
  87 }
  88
  89 ir_op *get_op_Mulh(void)  { return op_Mulh; }
  90
  91 void arch_dep_init(arch_dep_params_factory_t factory) {
  92         opts = arch_dep_none;
  93
  94         if (factory != NULL)
  95                 params = factory();
  96
  97         if (! op_Mulh) {
  98                 int mulh_opc = get_next_ir_opcode();
  99
 100                 /* create the Mulh operation */
 101                 op_Mulh = new_ir_op(mulh_opc, "Mulh",  op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0, NULL);
 102         }
 103 }
 104
 105 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
 106         opts = the_opts;
 107 }
 108
 109 /** check, whether a mode allows a Mulh instruction. */
 110 static int allow_Mulh(ir_mode *mode) {
 111         if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
 112                 return 0;
 113         return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
 114 }
 115
 116 /* Replace Muls with Shifts and Add/Subs. */
 117 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn) {
 118         ir_node *res = irn;
 119         ir_mode *mode = get_irn_mode(irn);
 120
 121         /* If the architecture dependent optimizations were not initialized
 122            or this optimization was not enabled. */
 123         if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
 124                 return irn;
 125
 126         if (get_irn_op(irn) == op_Mul && mode_is_int(mode)) {
 127                 ir_node *block   = get_nodes_block(irn);
 128                 ir_node *left    = get_binop_left(irn);
 129                 ir_node *right   = get_binop_right(irn);
 130                 tarval *tv       = NULL;
 131                 ir_node *operand = NULL;
 132
 133                 /* Look, if one operand is a constant. */
 134                 if (get_irn_opcode(left) == iro_Const) {
 135                         tv = get_Const_tarval(left);
 136                         operand = right;
 137                 } else if(get_irn_opcode(right) == iro_Const) {
 138                         tv = get_Const_tarval(right);
 139                         operand = left;
 140                 }
 141
 142                 if (tv != NULL) {
 143                         int maximum_shifts = params->maximum_shifts;
 144                         int also_use_subs = params->also_use_subs;
 145                         int highest_shift_amount = params->highest_shift_amount;
 146
 147                         char *bitstr = get_tarval_bitpattern(tv);
 148                         char *p;
 149                         int i, last = 0;
 150                         int counter = 0;
 151                         int curr_bit;
 152                         int compr_len = 0;
 153                         char compr[MAX_BITSTR];
 154
 155                         int singleton;
 156                         int end_of_group;
 157                         int shift_with_sub[MAX_BITSTR] = { 0 };
 158                         int shift_without_sub[MAX_BITSTR] = { 0 };
 159                         int shift_with_sub_pos = 0;
 160                         int shift_without_sub_pos = 0;
 161
 162 #if DEB
 163                         {
 164                                 long val = get_tarval_long(tv);
 165                                 fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
 166                                 for(p = bitstr; *p != '\0'; p++)
 167                                         printf("%c", *p);
 168                                 printf("\n");
 169                         }
 170 #endif
 171
 172                         for(p = bitstr; *p != '\0'; p++) {
 173                                 int bit = *p != '0';
 174
 175                                 if (bit != last) {
 176                                         /* The last was 1 we are now at 0 OR
 177                                          * The last was 0 and we are now at 1 */
 178                                         compr[compr_len++] = counter;
 179                                         counter = 1;
 180                                 } else
 181                                         counter++;
 182
 183                                 last = bit;
 184                         }
 185                         compr[compr_len++] = counter;
 186
 187 #ifdef DEB
 188                         {
 189                                 const char *prefix = "";
 190                                 for(i = 0; i < compr_len; i++, prefix = ",")
 191                                         fprintf(stderr, "%s%d", prefix, compr[i]);
 192                                 fprintf("\n");
 193                         }
 194 #endif
 195
 196                         /* Go over all recorded one groups. */
 197                         curr_bit = compr[0];
 198
 199                         for(i = 1; i < compr_len; i = end_of_group + 2) {
 200                                 int j, zeros_in_group, ones_in_group;
 201
 202                                 ones_in_group = compr[i];
 203                                 zeros_in_group = 0;
 204
 205                                 /* Scan for singular 0s in a sequence. */
 206                                 for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
 207                                         zeros_in_group += 1;
 208                                         ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
 209                                 }
 210                                 end_of_group = j - 1;
 211
 212                                 if(zeros_in_group >= ones_in_group - 1)
 213                                         end_of_group = i;
 214
 215 #ifdef DEB
 216                                 fprintf(stderr, "  i:%d, eg:%d\n", i, end_of_group);
 217 #endif
 218
 219                                 singleton = compr[i] == 1 && i == end_of_group;
 220                                 for(j = i; j <= end_of_group; j += 2) {
 221                                         int curr_ones = compr[j];
 222                                         int biased_curr_bit = curr_bit + 1;
 223                                         int k;
 224
 225 #ifdef DEB
 226                                         fprintf(stderr, "    j:%d, ones:%d\n", j, curr_ones);
 227 #endif
 228
 229                                         /* If this ones group is a singleton group (it has no
 230                                            singleton zeros inside. */
 231                                         if(singleton)
 232                                                 shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 233                                         else if(j == i)
 234                                                 shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 235
 236                                         for(k = 0; k < curr_ones; k++)
 237                                                 shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
 238
 239                                         curr_bit += curr_ones;
 240                                         biased_curr_bit = curr_bit + 1;
 241
 242                                         if(!singleton && j == end_of_group)
 243                                                 shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
 244                                         else if(j != end_of_group)
 245                                                 shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
 246
 247                                         curr_bit += compr[j + 1];
 248                                 }
 249                         }
 250
 251                         {
 252                                 int *shifts = shift_with_sub;
 253                                 int n = shift_with_sub_pos;
 254                                 int highest_shift_wide = 0;
 255                                 int highest_shift_seq = 0;
 256                                 int last_shift = 0;
 257
 258                                 /* If we may not use subs, or we can achive the same with adds,
 259                                    prefer adds. */
 260                                 if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
 261                                         shifts = shift_without_sub;
 262                                         n = shift_without_sub_pos;
 263                                 }
 264
 265                                 /* If the number of needed shifts exceeds the given maximum,
 266                                    use the Mul and exit. */
 267                                 if(n > maximum_shifts) {
 268 #ifdef DEB
 269                                         fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
 270                                                 maximum_shifts, n);
 271 #endif
 272                                         return irn;
 273                                 }
 274
 275                                 /* Compute the highest shift needed for both, the
 276                                    sequential and wide representations. */
 277                                 for(i = 0; i < n; i++) {
 278                                         int curr = abs(shifts[i]);
 279                                         int curr_seq = curr - last;
 280
 281                                         highest_shift_wide = curr > highest_shift_wide ? curr : highest_shift_wide;
 282                                         highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq : highest_shift_seq;
 283
 284                                         last_shift = curr;
 285                                 }
 286
 287                                 /* If the highest shift amount is greater than the given limit,
 288                                    give back the Mul */
 289                                 if(highest_shift_seq > highest_shift_amount) {
 290 #ifdef DEB
 291                                         fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
 292                                                 highest_shift_seq, highest_shift_amount);
 293 #endif
 294                                         return irn;
 295                                 }
 296
 297                                 /* If we have subs, we cannot do sequential. */
 298                                 if(1 /* also_use_subs */) {
 299                                         if(n > 0) {
 300                                                 ir_node *curr = NULL;
 301
 302                                                 i = n - 1;
 303
 304                                                 do {
 305                                                         int curr_shift = shifts[i];
 306                                                         int sub = curr_shift < 0;
 307                                                         int amount = abs(curr_shift) - 1;
 308                                                         ir_node *aux = operand;
 309
 310                                                         assert(amount >= 0 && "What is a negative shift??");
 311
 312                                                         if (amount != 0) {
 313                                                                 ir_node *cnst = new_r_Const_long(current_ir_graph, block, mode_Iu, amount);
 314                                                                 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
 315                                                         }
 316
 317                                                         if (curr) {
 318                                                                 if (sub)
 319                                                                         curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
 320                                                                 else
 321                                                                         curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
 322                                                         } else
 323                                                                 curr = aux;
 324
 325                                                 } while(--i >= 0);
 326
 327                                                 res = curr;
 328                                         }
 329                                 }
 330
 331 #ifdef DEB
 332                                 {
 333                                         const char *prefix = "";
 334                                         for (i = 0; i < n; ++i) {
 335                                                 fprintf(stderr, "%s%d", prefix, shifts[i]);
 336                                                 prefix = ", ";
 337                                         }
 338                                         fprintf(stderr, "\n");
 339                                 }
 340 #endif
 341
 342                         }
 343
 344                         if(bitstr)
 345                                 free(bitstr);
 346                 }
 347
 348         }
 349
 350         if (res != irn)
 351                 hook_arch_dep_replace_mul_with_shifts(irn);
 352
 353         return res;
 354 }
 355
 356 /**
 357  * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 358  */
 359 static int tv_ld2(tarval *tv, int bits) {
 360         int i, k = 0, num;
 361
 362         for (num = i = 0; i < bits; ++i) {
 363                 unsigned char v = get_tarval_sub_bits(tv, i);
 364
 365                 if (v) {
 366                         int j;
 367
 368                         for (j = 0; j < 8; ++j)
 369                                 if ((1 << j) & v) {
 370                                         ++num;
 371                                         k = 8 * i + j;
 372                                 }
 373                 }
 374         }
 375         if (num == 1)
 376                 return k;
 377         return -1;
 378 }
 379
 380
 381 /* for shorter lines */
 382 #define ABS(a)    tarval_abs(a)
 383 #define NEG(a)    tarval_neg(a)
 384 #define NOT(a)    tarval_not(a)
 385 #define SHL(a, b) tarval_shl(a, b)
 386 #define SHR(a, b) tarval_shr(a, b)
 387 #define ADD(a, b) tarval_add(a, b)
 388 #define SUB(a, b) tarval_sub(a, b)
 389 #define MUL(a, b) tarval_mul(a, b)
 390 #define DIV(a, b) tarval_div(a, b)
 391 #define MOD(a, b) tarval_mod(a, b)
 392 #define CMP(a, b) tarval_cmp(a, b)
 393 #define CNV(a, m) tarval_convert_to(a, m)
 394 #define ONE(m)    get_mode_one(m)
 395 #define ZERO(m)   get_mode_null(m)
 396
 397 /** The result of a the magic() function. */
 398 struct ms {
 399         tarval *M;        /**< magic number */
 400         int s;            /**< shift amount */
 401         int need_add;     /**< an additional add is needed */
 402         int need_sub;     /**< an additional sub is needed */
 403 };
 404
 405 /**
 406  * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
 407  *
 408  * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
 409  */
 410 static struct ms magic(tarval *d) {
 411         ir_mode *mode   = get_tarval_mode(d);
 412         ir_mode *u_mode = find_unsigned_mode(mode);
 413         int bits        = get_mode_size_bits(u_mode);
 414         int p;
 415         tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t;     /* unsigned */
 416         pn_Cmp d_cmp, M_cmp;
 417
 418         tarval *bits_minus_1, *two_bits_1;
 419
 420         struct ms mag;
 421
 422         tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 423
 424         /* we need overflow mode to work correctly */
 425         tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 426
 427         /* 2^(bits-1) */
 428         bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
 429         two_bits_1   = SHL(get_mode_one(u_mode), bits_minus_1);
 430
 431         ad  = CNV(ABS(d), u_mode);
 432         t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
 433         anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad));   /* Absolute value of nc */
 434         p   = bits - 1;                               /* Init: p */
 435         q1  = DIV(two_bits_1, anc);                   /* Init: q1 = 2^p/|nc| */
 436         r1  = SUB(two_bits_1, MUL(q1, anc));          /* Init: r1 = rem(2^p, |nc|) */
 437         q2  = DIV(two_bits_1, ad);                    /* Init: q2 = 2^p/|d| */
 438         r2  = SUB(two_bits_1, MUL(q2, ad));           /* Init: r2 = rem(2^p, |d|) */
 439
 440         do {
 441                 ++p;
 442                 q1 = ADD(q1, q1);                           /* Update q1 = 2^p/|nc| */
 443                 r1 = ADD(r1, r1);                           /* Update r1 = rem(2^p, |nc|) */
 444
 445                 if (CMP(r1, anc) & pn_Cmp_Ge) {
 446                         q1 = ADD(q1, ONE(u_mode));
 447                         r1 = SUB(r1, anc);
 448                 }
 449
 450                 q2 = ADD(q2, q2);                           /* Update q2 = 2^p/|d| */
 451                 r2 = ADD(r2, r2);                           /* Update r2 = rem(2^p, |d|) */
 452
 453                 if (CMP(r2, ad) & pn_Cmp_Ge) {
 454                         q2 = ADD(q2, ONE(u_mode));
 455                         r2 = SUB(r2, ad);
 456                 }
 457
 458                 delta = SUB(ad, r2);
 459         } while (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(u_mode)) & pn_Cmp_Eq));
 460
 461         d_cmp = CMP(d, ZERO(mode));
 462
 463         if (d_cmp & pn_Cmp_Ge)
 464                 mag.M = ADD(CNV(q2, mode), ONE(mode));
 465         else
 466                 mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
 467
 468         M_cmp = CMP(mag.M, ZERO(mode));
 469
 470         mag.s = p - bits;
 471
 472         /* need an add if d > 0 && M < 0 */
 473         mag.need_add = d_cmp & pn_Cmp_Gt && M_cmp & pn_Cmp_Lt;
 474
 475         /* need a sub if d < 0 && M > 0 */
 476         mag.need_sub = d_cmp & pn_Cmp_Lt && M_cmp & pn_Cmp_Gt;
 477
 478         tarval_set_integer_overflow_mode(rem);
 479
 480         return mag;
 481 }
 482
 483 /** The result of the magicu() function. */
 484 struct mu {
 485         tarval *M;        /**< magic add constant */
 486         int s;            /**< shift amount */
 487         int need_add;     /**< add indicator */
 488 };
 489
 490 /**
 491  * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
 492  *
 493  * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
 494  */
 495 static struct mu magicu(tarval *d) {
 496         ir_mode *mode   = get_tarval_mode(d);
 497         int bits        = get_mode_size_bits(mode);
 498         int p;
 499         tarval *nc, *delta, *q1, *r1, *q2, *r2;
 500         tarval *bits_minus_1, *two_bits_1, *seven_ff;
 501
 502         struct mu magu;
 503
 504         tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
 505
 506         /* we need overflow mode to work correctly */
 507         tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
 508
 509         bits_minus_1 = new_tarval_from_long(bits - 1, mode);
 510         two_bits_1   = SHL(get_mode_one(mode), bits_minus_1);
 511         seven_ff     = SUB(two_bits_1, ONE(mode));
 512
 513         magu.need_add = 0;                            /* initialize the add indicator */
 514         nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
 515         p  = bits - 1;                                /* Init: p */
 516         q1 = DIV(two_bits_1, nc);                     /* Init: q1 = 2^p/nc */
 517         r1 = SUB(two_bits_1, MUL(q1, nc));            /* Init: r1 = rem(2^p, nc) */
 518         q2 = DIV(seven_ff, d);                        /* Init: q2 = (2^p - 1)/d */
 519         r2 = SUB(seven_ff, MUL(q2, d));               /* Init: r2 = rem(2^p - 1, d) */
 520
 521         do {
 522                 ++p;
 523                 if (CMP(r1, SUB(nc, r1)) & pn_Cmp_Ge) {
 524                         q1 = ADD(ADD(q1, q1), ONE(mode));
 525                         r1 = SUB(ADD(r1, r1), nc);
 526                 }
 527                 else {
 528                         q1 = ADD(q1, q1);
 529                         r1 = ADD(r1, r1);
 530                 }
 531
 532                 if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & pn_Cmp_Ge) {
 533                         if (CMP(q2, seven_ff) & pn_Cmp_Ge)
 534                                 magu.need_add = 1;
 535
 536                         q2 = ADD(ADD(q2, q2), ONE(mode));
 537                         r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
 538                 }
 539                 else {
 540                         if (CMP(q2, two_bits_1) & pn_Cmp_Ge)
 541                                 magu.need_add = 1;
 542
 543                         q2 = ADD(q2, q2);
 544                         r2 = ADD(ADD(r2, r2), ONE(mode));
 545                 }
 546                 delta = SUB(SUB(d, ONE(mode)), r2);
 547         } while (p < 2*bits &&
 548                 (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(mode)) & pn_Cmp_Eq)));
 549
 550         magu.M = ADD(q2, ONE(mode));                       /* Magic number */
 551         magu.s = p - bits;                                 /* and shift amount */
 552
 553         tarval_set_integer_overflow_mode(rem);
 554
 555         return magu;
 556 }
 557
 558 /**
 559  * Build the Mulh replacement code for n / tv.
 560  *
 561  * Note that 'div' might be a mod or DivMod operation as well
 562  */
 563 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv) {
 564         dbg_info *dbg  = get_irn_dbg_info(div);
 565         ir_node *n     = get_binop_left(div);
 566         ir_node *block = get_nodes_block(div);
 567         ir_mode *mode  = get_irn_mode(n);
 568         int bits       = get_mode_size_bits(mode);
 569         ir_node *q, *t, *c;
 570
 571         /* Beware: do not transform bad code */
 572         if (is_Bad(n) || is_Bad(block))
 573                 return div;
 574
 575         if (mode_is_signed(mode)) {
 576                 struct ms mag = magic(tv);
 577
 578                 /* generate the Mulh instruction */
 579                 c = new_r_Const(current_ir_graph, block, mode, mag.M);
 580                 q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 581
 582                 /* do we need an Add or Sub */
 583                 if (mag.need_add)
 584                         q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 585                 else if (mag.need_sub)
 586                         q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
 587
 588                 /* Do we need the shift */
 589                 if (mag.s > 0) {
 590                         c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 591                         q    = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
 592                 }
 593
 594                 /* final */
 595                 c = new_r_Const_long(current_ir_graph, block, mode_Iu, bits-1);
 596                 t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 597
 598                 q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
 599         } else {
 600                 struct mu mag = magicu(tv);
 601                 ir_node *c;
 602
 603                 /* generate the Mulh instruction */
 604                 c = new_r_Const(current_ir_graph, block, mode, mag.M);
 605                 q    = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
 606
 607                 if (mag.need_add) {
 608                         if (mag.s > 0) {
 609                                 /* use the GM scheme */
 610                                 t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
 611
 612                                 c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
 613                                 t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 614
 615                                 t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
 616
 617                                 c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s-1);
 618                                 q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
 619                         } else {
 620                                 /* use the default scheme */
 621                                 q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
 622                         }
 623                 } else if (mag.s > 0) { /* default scheme, shift needed */
 624                         c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
 625                         q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
 626                 }
 627         }
 628         return q;
 629 }
 630
 631 /* Replace Divs with Shifts and Add/Subs and Mulh. */
 632 ir_node *arch_dep_replace_div_by_const(ir_node *irn) {
 633         ir_node *res  = irn;
 634
 635         /* If the architecture dependent optimizations were not initialized
 636         or this optimization was not enabled. */
 637         if (params == NULL || (opts & arch_dep_div_by_const) == 0)
 638                 return irn;
 639
 640         if (get_irn_opcode(irn) == iro_Div) {
 641                 ir_node *c = get_Div_right(irn);
 642                 ir_node *block, *left;
 643                 ir_mode *mode;
 644                 tarval *tv, *ntv;
 645                 dbg_info *dbg;
 646                 int n, bits;
 647                 int k, n_flag;
 648
 649                 if (get_irn_op(c) != op_Const)
 650                         return irn;
 651
 652                 tv = get_Const_tarval(c);
 653
 654                 /* check for division by zero */
 655                 if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 656                         return irn;
 657
 658                 left  = get_Div_left(irn);
 659                 mode  = get_irn_mode(left);
 660                 block = get_nodes_block(irn);
 661                 dbg   = get_irn_dbg_info(irn);
 662
 663                 bits = get_mode_size_bits(mode);
 664                 n    = (bits + 7) / 8;
 665
 666                 k = -1;
 667                 if (mode_is_signed(mode)) {
 668                         /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 669                         ntv = tarval_neg(tv);
 670                         n_flag = 1;
 671                         k = tv_ld2(ntv, n);
 672                 }
 673
 674                 if (k < 0) {
 675                         n_flag = 0;
 676                         k = tv_ld2(tv, n);
 677                 }
 678
 679                 if (k >= 0) { /* division by 2^k or -2^k */
 680                         if (mode_is_signed(mode)) {
 681                                 ir_node *k_node;
 682                                 ir_node *curr = left;
 683
 684                                 if (k != 1) {
 685                                         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 686                                         curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 687                                 }
 688
 689                                 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 690                                 curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 691
 692                                 curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 693
 694                                 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 695                                 res    = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
 696
 697                                 if (n_flag) { /* negate the result */
 698                                         ir_node *k_node;
 699
 700                                         k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 701                                         res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
 702                                 }
 703                         } else {      /* unsigned case */
 704                                 ir_node *k_node;
 705
 706                                 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 707                                 res    = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 708                         }
 709                 } else {
 710                         /* other constant */
 711                         if (allow_Mulh(mode))
 712                                 res = replace_div_by_mulh(irn, tv);
 713                 }
 714         }
 715
 716         if (res != irn)
 717                 hook_arch_dep_replace_division_by_const(irn);
 718
 719         return res;
 720 }
 721
 722 /* Replace Mods with Shifts and Add/Subs and Mulh. */
 723 ir_node *arch_dep_replace_mod_by_const(ir_node *irn) {
 724         ir_node *res  = irn;
 725
 726         /* If the architecture dependent optimizations were not initialized
 727            or this optimization was not enabled. */
 728         if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
 729                 return irn;
 730
 731         if (get_irn_opcode(irn) == iro_Mod) {
 732                 ir_node *c = get_Mod_right(irn);
 733                 ir_node *block, *left;
 734                 ir_mode *mode;
 735                 tarval *tv, *ntv;
 736                 dbg_info *dbg;
 737                 int n, bits;
 738                 int k;
 739
 740                 if (get_irn_op(c) != op_Const)
 741                         return irn;
 742
 743                 tv = get_Const_tarval(c);
 744
 745                 /* check for division by zero */
 746                 if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 747                         return irn;
 748
 749                 left  = get_Mod_left(irn);
 750                 mode  = get_irn_mode(left);
 751                 block = get_nodes_block(irn);
 752                 dbg   = get_irn_dbg_info(irn);
 753                 bits = get_mode_size_bits(mode);
 754                 n    = (bits + 7) / 8;
 755
 756                 k = -1;
 757                 if (mode_is_signed(mode)) {
 758                         /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 759                         ntv = tarval_neg(tv);
 760                         k = tv_ld2(ntv, n);
 761                 }
 762
 763                 if (k < 0) {
 764                         k = tv_ld2(tv, n);
 765                 }
 766
 767                 if (k >= 0) {
 768                         /* division by 2^k or -2^k:
 769                          * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
 770                          */
 771                         if (mode_is_signed(mode)) {
 772                                 ir_node *k_node;
 773                                 ir_node *curr = left;
 774
 775                                 if (k != 1) {
 776                                         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 777                                         curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 778                                 }
 779
 780                                 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 781                                 curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 782
 783                                 curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 784
 785                                 k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 786                                 curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 787
 788                                 res    = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 789                         } else {      /* unsigned case */
 790                                 ir_node *k_node;
 791
 792                                 k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 793                                 res    = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 794                         }
 795                 } else {
 796                         /* other constant */
 797                         if (allow_Mulh(mode)) {
 798                                 res = replace_div_by_mulh(irn, tv);
 799
 800                                 res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
 801
 802                                 /* res = arch_dep_mul_to_shift(res); */
 803
 804                                 res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
 805                         }
 806                 }
 807         }
 808
 809         if (res != irn)
 810                 hook_arch_dep_replace_division_by_const(irn);
 811
 812         return res;
 813 }
 814
 815 /* Replace DivMods with Shifts and Add/Subs and Mulh. */
 816 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn) {
 817         *div = *mod = NULL;
 818
 819         /* If the architecture dependent optimizations were not initialized
 820            or this optimization was not enabled. */
 821         if (params == NULL ||
 822                 ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
 823                 return;
 824
 825         if (get_irn_opcode(irn) == iro_DivMod) {
 826                 ir_node *c = get_DivMod_right(irn);
 827                 ir_node *block, *left;
 828                 ir_mode *mode;
 829                 tarval *tv, *ntv;
 830                 dbg_info *dbg;
 831                 int n, bits;
 832                 int k, n_flag;
 833
 834                 if (get_irn_op(c) != op_Const)
 835                         return;
 836
 837                 tv = get_Const_tarval(c);
 838
 839                 /* check for division by zero */
 840                 if (classify_tarval(tv) == TV_CLASSIFY_NULL)
 841                         return;
 842
 843                 left  = get_DivMod_left(irn);
 844                 mode  = get_irn_mode(left);
 845                 block = get_nodes_block(irn);
 846                 dbg   = get_irn_dbg_info(irn);
 847
 848                 bits = get_mode_size_bits(mode);
 849                 n    = (bits + 7) / 8;
 850
 851                 k = -1;
 852                 if (mode_is_signed(mode)) {
 853                         /* for signed divisions, the algorithm works for a / -2^k by negating the result */
 854                         ntv = tarval_neg(tv);
 855                         n_flag = 1;
 856                         k = tv_ld2(ntv, n);
 857                 }
 858
 859                 if (k < 0) {
 860                         n_flag = 0;
 861                         k = tv_ld2(tv, n);
 862                 }
 863
 864                 if (k >= 0) { /* division by 2^k or -2^k */
 865                         if (mode_is_signed(mode)) {
 866                                 ir_node *k_node, *c_k;
 867                                 ir_node *curr = left;
 868
 869                                 if (k != 1) {
 870                                         k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
 871                                         curr   = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
 872                                 }
 873
 874                                 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
 875                                 curr   = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
 876
 877                                 curr   = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
 878
 879                                 c_k    = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 880
 881                                 *div   = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
 882
 883                                 if (n_flag) { /* negate the div result */
 884                                         ir_node *k_node;
 885
 886                                         k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
 887                                         *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
 888                                 }
 889
 890                                 k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
 891                                 curr   = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
 892
 893                                 *mod   = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
 894                         } else {      /* unsigned case */
 895                                 ir_node *k_node;
 896
 897                                 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
 898                                 *div   = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
 899
 900                                 k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
 901                                 *mod   = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
 902                         }
 903                 } else {
 904                         /* other constant */
 905                         if (allow_Mulh(mode)) {
 906                                 ir_node *t;
 907
 908                                 *div = replace_div_by_mulh(irn, tv);
 909
 910                                 t    = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
 911
 912                                 /* t = arch_dep_mul_to_shift(t); */
 913
 914                                 *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
 915                         }
 916                 }
 917         }
 918
 919         if (*div)
 920                 hook_arch_dep_replace_division_by_const(irn);
 921 }
 922
 923
 924 static const ir_settings_arch_dep_t default_params = {
 925         1,  /* also use subs */
 926         4,  /* maximum shifts */
 927         31, /* maximum shift amount */
 928
 929         0,  /* allow Mulhs */
 930         0,  /* allow Mulus */
 931         32  /* Mulh allowed up to 32 bit */
 932 };
 933
 934 /* A default parameter factory for testing purposes. */
 935 const ir_settings_arch_dep_t *arch_dep_default_factory(void) {
 936         return &default_params;
 937 }