2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Machine dependent Firm optimizations.
24 * @author Sebastian Hack, Michael Beck
38 #include "irgraph_t.h"
45 #include "dbginfo_t.h"
46 #include "iropt_dbg.h"
51 #include "irreflect.h"
57 /* when we need verifying */
59 # define IRN_VRFY_IRG(res, irg)
61 # define IRN_VRFY_IRG(res, irg) irn_vrfy_irg(res, irg)
64 /** The params got from the factory in arch_dep_init(...). */
65 static const arch_dep_params_t *params = NULL;
67 /** The bit mask, which optimizations to apply. */
68 static arch_dep_opts_t opts;
70 /* we need this new pseudo op */
71 static ir_op *op_Mulh = NULL;
74 * construct a Mulh: Mulh(a,b) = (a * b) >> w, w is the with in bits of a, b
77 new_rd_Mulh (dbg_info *db, ir_graph *irg, ir_node *block,
78 ir_node *op1, ir_node *op2, ir_mode *mode)
85 res = new_ir_node(db, irg, block, op_Mulh, mode, 2, in);
86 res = optimize_node(res);
87 IRN_VRFY_IRG(res, irg);
91 ir_op *get_op_Mulh(void) { return op_Mulh; }
93 void arch_dep_init(arch_dep_params_factory_t factory)
102 int mulh_opc = get_next_ir_opcode();
104 /* create the Mulh operation */
105 op_Mulh = new_ir_op(mulh_opc, "Mulh", op_pin_state_floats, irop_flag_commutative, oparity_binary, 0, 0, NULL);
106 sig = rflct_signature_allocate(1, 3);
107 rflct_signature_set_arg(sig, 0, 0, "Res", RFLCT_MC(Int), 0, 0);
108 rflct_signature_set_arg(sig, 1, 0, "Block", RFLCT_MC(BB), 0, 0);
109 rflct_signature_set_arg(sig, 1, 1, "Op 0", RFLCT_MC(Int), 0, 0);
110 rflct_signature_set_arg(sig, 1, 2, "Op 1", RFLCT_MC(Int), 0, 0);
112 rflct_new_opcode(mulh_opc, "Mulh", 0);
113 rflct_opcode_add_signature(mulh_opc, sig);
117 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
121 /** check, whether a mode allows a Mulh instruction. */
122 static int allow_Mulh(ir_mode *mode)
124 if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
126 return (mode_is_signed(mode) && params->allow_mulhs) || (!mode_is_signed(mode) && params->allow_mulhu);
129 /* Replace Muls with Shifts and Add/Subs. */
130 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
133 ir_mode *mode = get_irn_mode(irn);
135 /* If the architecture dependent optimizations were not initialized
136 or this optimization was not enabled. */
137 if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
140 if (get_irn_op(irn) == op_Mul && mode_is_int(mode)) {
141 ir_node *block = get_irn_n(irn, -1);
142 ir_node *left = get_binop_left(irn);
143 ir_node *right = get_binop_right(irn);
145 ir_node *operand = NULL;
147 /* Look, if one operand is a constant. */
148 if (get_irn_opcode(left) == iro_Const) {
149 tv = get_Const_tarval(left);
151 } else if(get_irn_opcode(right) == iro_Const) {
152 tv = get_Const_tarval(right);
157 int maximum_shifts = params->maximum_shifts;
158 int also_use_subs = params->also_use_subs;
159 int highest_shift_amount = params->highest_shift_amount;
161 char *bitstr = get_tarval_bitpattern(tv);
167 char compr[MAX_BITSTR];
171 int shift_with_sub[MAX_BITSTR] = { 0 };
172 int shift_without_sub[MAX_BITSTR] = { 0 };
173 int shift_with_sub_pos = 0;
174 int shift_without_sub_pos = 0;
178 long val = get_tarval_long(tv);
179 fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
180 for(p = bitstr; *p != '\0'; p++)
186 for(p = bitstr; *p != '\0'; p++) {
190 /* The last was 1 we are now at 0 OR
191 * The last was 0 and we are now at 1 */
192 compr[compr_len++] = counter;
200 compr[compr_len++] = counter;
205 const char *prefix = "";
206 for(i = 0; i < compr_len; i++, prefix = ",")
207 fprintf(stderr, "%s%d", prefix, compr[i]);
212 /* Go over all recorded one groups. */
215 for(i = 1; i < compr_len; i = end_of_group + 2) {
216 int j, zeros_in_group, ones_in_group;
218 ones_in_group = compr[i];
221 /* Scan for singular 0s in a sequence. */
222 for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
224 ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
226 end_of_group = j - 1;
228 if(zeros_in_group >= ones_in_group - 1)
232 fprintf(stderr, " i:%d, eg:%d\n", i, end_of_group);
235 singleton = compr[i] == 1 && i == end_of_group;
236 for(j = i; j <= end_of_group; j += 2) {
237 int curr_ones = compr[j];
238 int biased_curr_bit = curr_bit + 1;
242 fprintf(stderr, " j:%d, ones:%d\n", j, curr_ones);
245 /* If this ones group is a singleton group (it has no
246 singleton zeros inside. */
248 shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
250 shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
252 for(k = 0; k < curr_ones; k++)
253 shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
255 curr_bit += curr_ones;
256 biased_curr_bit = curr_bit + 1;
258 if(!singleton && j == end_of_group)
259 shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
260 else if(j != end_of_group)
261 shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
263 curr_bit += compr[j + 1];
269 int *shifts = shift_with_sub;
270 int n = shift_with_sub_pos;
271 int highest_shift_wide = 0;
272 int highest_shift_seq = 0;
275 /* If we may not use subs, or we can achive the same with adds,
277 if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
278 shifts = shift_without_sub;
279 n = shift_without_sub_pos;
282 /* If the number of needed shifts exceeds the given maximum,
283 use the Mul and exit. */
284 if(n > maximum_shifts) {
286 fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
292 /* Compute the highest shift needed for both, the
293 sequential and wide representations. */
294 for(i = 0; i < n; i++) {
295 int curr = abs(shifts[i]);
296 int curr_seq = curr - last;
298 highest_shift_wide = curr > highest_shift_wide ? curr
299 : highest_shift_wide;
300 highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
306 /* If the highest shift amount is greater than the given limit,
308 if(highest_shift_seq > highest_shift_amount) {
310 fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
311 highest_shift_seq, highest_shift_amount);
316 /* If we have subs, we cannot do sequential. */
317 if(1 /* also_use_subs */) {
319 ir_node *curr = NULL;
324 int curr_shift = shifts[i];
325 int sub = curr_shift < 0;
326 int amount = abs(curr_shift) - 1;
327 ir_node *aux = operand;
329 assert(amount >= 0 && "What is a negative shift??");
332 ir_node *cnst = new_r_Const_long(current_ir_graph, block, mode_Iu, amount);
333 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
338 curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
340 curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
352 const char *prefix = "";
353 for (i = 0; i < n; ++i) {
354 fprintf(stderr, "%s%d", prefix, shifts[i]);
357 fprintf(stderr, "\n");
370 hook_arch_dep_replace_mul_with_shifts(irn);
376 * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
378 static int tv_ld2(tarval *tv, int bits)
382 for (num = i = 0; i < bits; ++i) {
383 unsigned char v = get_tarval_sub_bits(tv, i);
388 for (j = 0; j < 8; ++j)
401 /* for shorter lines */
402 #define ABS(a) tarval_abs(a)
403 #define NEG(a) tarval_neg(a)
404 #define NOT(a) tarval_not(a)
405 #define SHL(a, b) tarval_shl(a, b)
406 #define SHR(a, b) tarval_shr(a, b)
407 #define ADD(a, b) tarval_add(a, b)
408 #define SUB(a, b) tarval_sub(a, b)
409 #define MUL(a, b) tarval_mul(a, b)
410 #define DIV(a, b) tarval_div(a, b)
411 #define MOD(a, b) tarval_mod(a, b)
412 #define CMP(a, b) tarval_cmp(a, b)
413 #define CNV(a, m) tarval_convert_to(a, m)
414 #define ONE(m) get_mode_one(m)
415 #define ZERO(m) get_mode_null(m)
417 /** The result of a the magic() function. */
419 tarval *M; /**< magic number */
420 int s; /**< shift amount */
421 int need_add; /**< an additional add is needed */
422 int need_sub; /**< an additional sub is needed */
426 * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
428 * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
430 static struct ms magic(tarval *d)
432 ir_mode *mode = get_tarval_mode(d);
433 ir_mode *u_mode = find_unsigned_mode(mode);
434 int bits = get_mode_size_bits(u_mode);
436 tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t; /* unsigned */
439 tarval *bits_minus_1, *two_bits_1;
443 tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
445 /* we need overflow mode to work correctly */
446 tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
449 bits_minus_1 = new_tarval_from_long(bits - 1, u_mode);
450 two_bits_1 = SHL(get_mode_one(u_mode), bits_minus_1);
452 ad = CNV(ABS(d), u_mode);
453 t = ADD(two_bits_1, SHR(CNV(d, u_mode), bits_minus_1));
454 anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad)); /* Absolute value of nc */
455 p = bits - 1; /* Init: p */
456 q1 = DIV(two_bits_1, anc); /* Init: q1 = 2^p/|nc| */
457 r1 = SUB(two_bits_1, MUL(q1, anc)); /* Init: r1 = rem(2^p, |nc|) */
458 q2 = DIV(two_bits_1, ad); /* Init: q2 = 2^p/|d| */
459 r2 = SUB(two_bits_1, MUL(q2, ad)); /* Init: r2 = rem(2^p, |d|) */
463 q1 = ADD(q1, q1); /* Update q1 = 2^p/|nc| */
464 r1 = ADD(r1, r1); /* Update r1 = rem(2^p, |nc|) */
466 if (CMP(r1, anc) & pn_Cmp_Ge) {
467 q1 = ADD(q1, ONE(u_mode));
471 q2 = ADD(q2, q2); /* Update q2 = 2^p/|d| */
472 r2 = ADD(r2, r2); /* Update r2 = rem(2^p, |d|) */
474 if (CMP(r2, ad) & pn_Cmp_Ge) {
475 q2 = ADD(q2, ONE(u_mode));
480 } while (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(u_mode)) & pn_Cmp_Eq));
482 d_cmp = CMP(d, ZERO(mode));
484 if (d_cmp & pn_Cmp_Ge)
485 mag.M = ADD(CNV(q2, mode), ONE(mode));
487 mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
489 M_cmp = CMP(mag.M, ZERO(mode));
493 /* need an add if d > 0 && M < 0 */
494 mag.need_add = d_cmp & pn_Cmp_Gt && M_cmp & pn_Cmp_Lt;
496 /* need a sub if d < 0 && M > 0 */
497 mag.need_sub = d_cmp & pn_Cmp_Lt && M_cmp & pn_Cmp_Gt;
499 tarval_set_integer_overflow_mode(rem);
504 /** The result of the magicu() function. */
506 tarval *M; /**< magic add constant */
507 int s; /**< shift amount */
508 int need_add; /**< add indicator */
512 * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
514 * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
516 static struct mu magicu(tarval *d)
518 ir_mode *mode = get_tarval_mode(d);
519 int bits = get_mode_size_bits(mode);
521 tarval *nc, *delta, *q1, *r1, *q2, *r2;
522 tarval *bits_minus_1, *two_bits_1, *seven_ff;
526 tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
528 /* we need overflow mode to work correctly */
529 tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
531 bits_minus_1 = new_tarval_from_long(bits - 1, mode);
532 two_bits_1 = SHL(get_mode_one(mode), bits_minus_1);
533 seven_ff = SUB(two_bits_1, ONE(mode));
535 magu.need_add = 0; /* initialize the add indicator */
536 nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
537 p = bits - 1; /* Init: p */
538 q1 = DIV(two_bits_1, nc); /* Init: q1 = 2^p/nc */
539 r1 = SUB(two_bits_1, MUL(q1, nc)); /* Init: r1 = rem(2^p, nc) */
540 q2 = DIV(seven_ff, d); /* Init: q2 = (2^p - 1)/d */
541 r2 = SUB(seven_ff, MUL(q2, d)); /* Init: r2 = rem(2^p - 1, d) */
545 if (CMP(r1, SUB(nc, r1)) & pn_Cmp_Ge) {
546 q1 = ADD(ADD(q1, q1), ONE(mode));
547 r1 = SUB(ADD(r1, r1), nc);
554 if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & pn_Cmp_Ge) {
555 if (CMP(q2, seven_ff) & pn_Cmp_Ge)
558 q2 = ADD(ADD(q2, q2), ONE(mode));
559 r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
562 if (CMP(q2, two_bits_1) & pn_Cmp_Ge)
566 r2 = ADD(ADD(r2, r2), ONE(mode));
568 delta = SUB(SUB(d, ONE(mode)), r2);
569 } while (p < 2*bits &&
570 (CMP(q1, delta) & pn_Cmp_Lt || (CMP(q1, delta) & pn_Cmp_Eq && CMP(r1, ZERO(mode)) & pn_Cmp_Eq)));
572 magu.M = ADD(q2, ONE(mode)); /* Magic number */
573 magu.s = p - bits; /* and shift amount */
575 tarval_set_integer_overflow_mode(rem);
581 * Build the Mulh replacement code for n / tv.
583 * Note that 'div' might be a mod or DivMod operation as well
585 static ir_node *replace_div_by_mulh(ir_node *div, tarval *tv)
587 dbg_info *dbg = get_irn_dbg_info(div);
588 ir_node *n = get_binop_left(div);
589 ir_node *block = get_irn_n(div, -1);
590 ir_mode *mode = get_irn_mode(n);
591 int bits = get_mode_size_bits(mode);
594 /* Beware: do not transform bad code */
595 if (is_Bad(n) || is_Bad(block))
598 if (mode_is_signed(mode)) {
599 struct ms mag = magic(tv);
601 /* generate the Mulh instruction */
602 c = new_r_Const(current_ir_graph, block, mode, mag.M);
603 q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
605 /* do we need an Add or Sub */
607 q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
608 else if (mag.need_sub)
609 q = new_rd_Sub(dbg, current_ir_graph, block, q, n, mode);
611 /* Do we need the shift */
613 c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
614 q = new_rd_Shrs(dbg, current_ir_graph, block, q, c, mode);
618 c = new_r_Const_long(current_ir_graph, block, mode_Iu, bits-1);
619 t = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
621 q = new_rd_Add(dbg, current_ir_graph, block, q, t, mode);
624 struct mu mag = magicu(tv);
627 /* generate the Mulh instruction */
628 c = new_r_Const(current_ir_graph, block, mode, mag.M);
629 q = new_rd_Mulh(dbg, current_ir_graph, block, n, c, mode);
633 /* use the GM scheme */
634 t = new_rd_Sub(dbg, current_ir_graph, block, n, q, mode);
636 c = new_r_Const(current_ir_graph, block, mode_Iu, get_mode_one(mode_Iu));
637 t = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
639 t = new_rd_Add(dbg, current_ir_graph, block, t, q, mode);
641 c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s-1);
642 q = new_rd_Shr(dbg, current_ir_graph, block, t, c, mode);
645 /* use the default scheme */
646 q = new_rd_Add(dbg, current_ir_graph, block, q, n, mode);
649 else if (mag.s > 0) { /* default scheme, shift needed */
650 c = new_r_Const_long(current_ir_graph, block, mode_Iu, mag.s);
651 q = new_rd_Shr(dbg, current_ir_graph, block, q, c, mode);
657 /* Replace Divs with Shifts and Add/Subs and Mulh. */
658 ir_node *arch_dep_replace_div_by_const(ir_node *irn)
662 /* If the architecture dependent optimizations were not initialized
663 or this optimization was not enabled. */
664 if (params == NULL || (opts & arch_dep_div_by_const) == 0)
667 if (get_irn_opcode(irn) == iro_Div) {
668 ir_node *c = get_Div_right(irn);
669 ir_node *block, *left;
676 if (get_irn_op(c) != op_Const)
679 tv = get_Const_tarval(c);
681 /* check for division by zero */
682 if (classify_tarval(tv) == TV_CLASSIFY_NULL)
685 left = get_Div_left(irn);
686 mode = get_irn_mode(left);
687 block = get_irn_n(irn, -1);
688 dbg = get_irn_dbg_info(irn);
690 bits = get_mode_size_bits(mode);
694 if (mode_is_signed(mode)) {
695 /* for signed divisions, the algorithm works for a / -2^k by negating the result */
696 ntv = tarval_neg(tv);
706 if (k >= 0) { /* division by 2^k or -2^k */
707 if (mode_is_signed(mode)) {
709 ir_node *curr = left;
712 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
713 curr = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
716 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
717 curr = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
719 curr = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
721 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
722 res = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
724 if (n_flag) { /* negate the result */
727 k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
728 res = new_rd_Sub(dbg, current_ir_graph, block, k_node, res, mode);
731 else { /* unsigned case */
734 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
735 res = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
740 if (allow_Mulh(mode))
741 res = replace_div_by_mulh(irn, tv);
746 hook_arch_dep_replace_division_by_const(irn);
751 /* Replace Mods with Shifts and Add/Subs and Mulh. */
752 ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
756 /* If the architecture dependent optimizations were not initialized
757 or this optimization was not enabled. */
758 if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
761 if (get_irn_opcode(irn) == iro_Mod) {
762 ir_node *c = get_Mod_right(irn);
763 ir_node *block, *left;
770 if (get_irn_op(c) != op_Const)
773 tv = get_Const_tarval(c);
775 /* check for division by zero */
776 if (classify_tarval(tv) == TV_CLASSIFY_NULL)
779 left = get_Mod_left(irn);
780 mode = get_irn_mode(left);
781 block = get_irn_n(irn, -1);
782 dbg = get_irn_dbg_info(irn);
783 bits = get_mode_size_bits(mode);
787 if (mode_is_signed(mode)) {
788 /* for signed divisions, the algorithm works for a / -2^k by negating the result */
789 ntv = tarval_neg(tv);
798 /* division by 2^k or -2^k:
799 * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
801 if (mode_is_signed(mode)) {
803 ir_node *curr = left;
806 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
807 curr = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
810 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
811 curr = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
813 curr = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
815 k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
816 curr = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
818 res = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
820 else { /* unsigned case */
823 k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
824 res = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
829 if (allow_Mulh(mode)) {
830 res = replace_div_by_mulh(irn, tv);
832 res = new_rd_Mul(dbg, current_ir_graph, block, res, c, mode);
834 /* res = arch_dep_mul_to_shift(res); */
836 res = new_rd_Sub(dbg, current_ir_graph, block, left, res, mode);
842 hook_arch_dep_replace_division_by_const(irn);
847 /* Replace DivMods with Shifts and Add/Subs and Mulh. */
848 void arch_dep_replace_divmod_by_const(ir_node **div, ir_node **mod, ir_node *irn)
852 /* If the architecture dependent optimizations were not initialized
853 or this optimization was not enabled. */
854 if (params == NULL ||
855 ((opts & (arch_dep_div_by_const|arch_dep_mod_by_const)) != (arch_dep_div_by_const|arch_dep_mod_by_const)))
858 if (get_irn_opcode(irn) == iro_DivMod) {
859 ir_node *c = get_DivMod_right(irn);
860 ir_node *block, *left;
867 if (get_irn_op(c) != op_Const)
870 tv = get_Const_tarval(c);
872 /* check for division by zero */
873 if (classify_tarval(tv) == TV_CLASSIFY_NULL)
876 left = get_DivMod_left(irn);
877 mode = get_irn_mode(left);
878 block = get_irn_n(irn, -1);
879 dbg = get_irn_dbg_info(irn);
881 bits = get_mode_size_bits(mode);
885 if (mode_is_signed(mode)) {
886 /* for signed divisions, the algorithm works for a / -2^k by negating the result */
887 ntv = tarval_neg(tv);
897 if (k >= 0) { /* division by 2^k or -2^k */
898 if (mode_is_signed(mode)) {
899 ir_node *k_node, *c_k;
900 ir_node *curr = left;
903 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k - 1);
904 curr = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
907 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, bits - k);
908 curr = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
910 curr = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
912 c_k = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
914 *div = new_rd_Shrs(dbg, current_ir_graph, block, curr, c_k, mode);
916 if (n_flag) { /* negate the div result */
919 k_node = new_r_Const(current_ir_graph, block, mode, get_mode_null(mode));
920 *div = new_rd_Sub(dbg, current_ir_graph, block, k_node, *div, mode);
923 k_node = new_r_Const_long(current_ir_graph, block, mode, (-1) << k);
924 curr = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
926 *mod = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
928 else { /* unsigned case */
931 k_node = new_r_Const_long(current_ir_graph, block, mode_Iu, k);
932 *div = new_rd_Shr(dbg, current_ir_graph, block, left, k_node, mode);
934 k_node = new_r_Const_long(current_ir_graph, block, mode, (1 << k) - 1);
935 *mod = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
940 if (allow_Mulh(mode)) {
943 *div = replace_div_by_mulh(irn, tv);
945 t = new_rd_Mul(dbg, current_ir_graph, block, *div, c, mode);
947 /* t = arch_dep_mul_to_shift(t); */
949 *mod = new_rd_Sub(dbg, current_ir_graph, block, left, t, mode);
955 hook_arch_dep_replace_division_by_const(irn);
959 static const arch_dep_params_t default_params = {
960 1, /* also use subs */
961 4, /* maximum shifts */
962 31, /* maximum shift amount */
966 32 /* Mulh allowed up to 32 bit */
969 /* A default parameter factory for testing purposes. */
970 const arch_dep_params_t *arch_dep_default_factory(void) {
971 return &default_params;