+/**
+ * Unsigned division by constant d: calculate the Magic multiplier M and the shift amount s
+ *
+ * see Hacker's Delight: 10-10 Integer Division by Constants: Incorporation into a Compiler (Unsigned)
+ */
+static struct mu magicu(ir_tarval *d)
+{
+ ir_mode *mode = get_tarval_mode(d);
+ int bits = get_mode_size_bits(mode);
+ int p;
+ ir_tarval *nc, *delta, *q1, *r1, *q2, *r2;
+ ir_tarval *bits_minus_1, *two_bits_1, *seven_ff;
+
+ struct mu magu;
+
+ tarval_int_overflow_mode_t rem = tarval_get_integer_overflow_mode();
+
+ /* we need overflow mode to work correctly */
+ tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+
+ bits_minus_1 = new_tarval_from_long(bits - 1, mode);
+ two_bits_1 = SHL(get_mode_one(mode), bits_minus_1);
+ seven_ff = SUB(two_bits_1, ONE(mode));
+
+ magu.need_add = 0; /* initialize the add indicator */
+ nc = SUB(NEG(ONE(mode)), MOD(NEG(d), d));
+ p = bits - 1; /* Init: p */
+ q1 = DIV(two_bits_1, nc); /* Init: q1 = 2^p/nc */
+ r1 = SUB(two_bits_1, MUL(q1, nc)); /* Init: r1 = rem(2^p, nc) */
+ q2 = DIV(seven_ff, d); /* Init: q2 = (2^p - 1)/d */
+ r2 = SUB(seven_ff, MUL(q2, d)); /* Init: r2 = rem(2^p - 1, d) */
+
+ do {
+ ++p;
+ if (CMP(r1, SUB(nc, r1)) & ir_relation_greater_equal) {
+ q1 = ADD(ADD(q1, q1), ONE(mode));
+ r1 = SUB(ADD(r1, r1), nc);
+ }
+ else {
+ q1 = ADD(q1, q1);
+ r1 = ADD(r1, r1);
+ }
+
+ if (CMP(ADD(r2, ONE(mode)), SUB(d, r2)) & ir_relation_greater_equal) {
+ if (CMP(q2, seven_ff) & ir_relation_greater_equal)
+ magu.need_add = 1;
+
+ q2 = ADD(ADD(q2, q2), ONE(mode));
+ r2 = SUB(ADD(ADD(r2, r2), ONE(mode)), d);
+ }
+ else {
+ if (CMP(q2, two_bits_1) & ir_relation_greater_equal)
+ magu.need_add = 1;
+
+ q2 = ADD(q2, q2);
+ r2 = ADD(ADD(r2, r2), ONE(mode));
+ }
+ delta = SUB(SUB(d, ONE(mode)), r2);
+ } while (p < 2*bits &&
+ (CMP(q1, delta) & ir_relation_less || (CMP(q1, delta) & ir_relation_equal && CMP(r1, ZERO(mode)) & ir_relation_equal)));
+
+ magu.M = ADD(q2, ONE(mode)); /* Magic number */
+ magu.s = p - bits; /* and shift amount */
+
+ tarval_set_integer_overflow_mode(rem);
+
+ return magu;
+}
+
+/**
+ * Build the Mulh replacement code for n / tv.
+ *
+ * Note that 'div' might be a Mod operation as well
+ */
+static ir_node *replace_div_by_mulh(ir_node *div, ir_tarval *tv)
+{
+ dbg_info *dbg = get_irn_dbg_info(div);
+ ir_node *n = get_binop_left(div);
+ ir_node *block = get_irn_n(div, -1);
+ ir_mode *mode = get_irn_mode(n);
+ int bits = get_mode_size_bits(mode);
+ ir_node *q, *t, *c;
+
+ /* Beware: do not transform bad code */
+ if (is_Bad(n) || is_Bad(block))
+ return div;
+
+ if (mode_is_signed(mode)) {
+ ir_graph *irg = get_irn_irg(div);
+ struct ms mag = magic(tv);
+
+ /* generate the Mulh instruction */
+ c = new_r_Const(irg, mag.M);
+ q = new_rd_Mulh(dbg, block, n, c, mode);
+
+ /* do we need an Add or Sub */
+ if (mag.need_add)
+ q = new_rd_Add(dbg, block, q, n, mode);
+ else if (mag.need_sub)
+ q = new_rd_Sub(dbg, block, q, n, mode);
+
+ /* Do we need the shift */
+ if (mag.s > 0) {
+ c = new_r_Const_long(irg, mode_Iu, mag.s);
+ q = new_rd_Shrs(dbg, block, q, c, mode);
+ }
+
+ /* final */
+ c = new_r_Const_long(irg, mode_Iu, bits - 1);
+ t = new_rd_Shr(dbg, block, q, c, mode);
+
+ q = new_rd_Add(dbg, block, q, t, mode);
+ } else {
+ struct mu mag = magicu(tv);
+ ir_node *c;
+ ir_graph *irg = get_irn_irg(div);
+
+ /* generate the Mulh instruction */
+ c = new_r_Const(irg, mag.M);
+ q = new_rd_Mulh(dbg, block, n, c, mode);
+
+ if (mag.need_add) {
+ if (mag.s > 0) {
+ /* use the GM scheme */
+ t = new_rd_Sub(dbg, block, n, q, mode);
+
+ c = new_r_Const(irg, get_mode_one(mode_Iu));
+ t = new_rd_Shr(dbg, block, t, c, mode);
+
+ t = new_rd_Add(dbg, block, t, q, mode);
+
+ c = new_r_Const_long(irg, mode_Iu, mag.s - 1);
+ q = new_rd_Shr(dbg, block, t, c, mode);
+ } else {
+ /* use the default scheme */
+ q = new_rd_Add(dbg, block, q, n, mode);
+ }
+ } else if (mag.s > 0) { /* default scheme, shift needed */
+ c = new_r_Const_long(irg, mode_Iu, mag.s);
+ q = new_rd_Shr(dbg, block, q, c, mode);
+ }
+ }
+ return q;
+}
+
+/* Replace Divs with Shifts and Add/Subs and Mulh. */
+ir_node *arch_dep_replace_div_by_const(ir_node *irn)
+{
+ const ir_settings_arch_dep_t *params = be_get_backend_param()->dep_param;
+ ir_node *res = irn;
+
+ /* If the architecture dependent optimizations were not initialized
+ or this optimization was not enabled. */
+ if (params == NULL || (opts & arch_dep_div_by_const) == 0)
+ return irn;
+
+ if (is_Div(irn)) {
+ ir_node *c = get_Div_right(irn);
+ ir_node *block, *left;
+ ir_mode *mode;
+ ir_tarval *tv, *ntv;
+ dbg_info *dbg;
+ int n, bits;
+ int k;
+ int n_flag = 0;
+
+ if (! is_Const(c))
+ return irn;
+
+ tv = get_Const_tarval(c);
+
+ /* check for division by zero */
+ if (tarval_is_null(tv))
+ return irn;
+
+ left = get_Div_left(irn);
+ mode = get_irn_mode(left);
+
+ /* can only handle integer Div's */
+ if (!mode_is_int(mode))
+ return irn;
+
+ block = get_irn_n(irn, -1);
+ dbg = get_irn_dbg_info(irn);
+
+ bits = get_mode_size_bits(mode);
+ n = (bits + 7) / 8;
+
+ k = -1;
+ if (mode_is_signed(mode)) {
+ /* for signed divisions, the algorithm works for a / -2^k by negating the result */
+ ntv = tarval_neg(tv);
+ n_flag = 1;
+ k = tv_ld2(ntv, n);
+ }
+
+ if (k < 0) {
+ n_flag = 0;
+ k = tv_ld2(tv, n);
+ }
+
+ if (k >= 0) { /* division by 2^k or -2^k */
+ ir_graph *irg = get_irn_irg(irn);
+ if (mode_is_signed(mode)) {
+ ir_node *k_node;
+ ir_node *curr = left;
+
+ /* create the correction code for signed values only if there might be a remainder */
+ if (! get_Div_no_remainder(irn)) {
+ if (k != 1) {
+ k_node = new_r_Const_long(irg, mode_Iu, k - 1);
+ curr = new_rd_Shrs(dbg, block, left, k_node, mode);
+ }
+
+ k_node = new_r_Const_long(irg, mode_Iu, bits - k);
+ curr = new_rd_Shr(dbg, block, curr, k_node, mode);
+
+ curr = new_rd_Add(dbg, block, left, curr, mode);
+ } else {
+ k_node = left;
+ }
+
+ k_node = new_r_Const_long(irg, mode_Iu, k);
+ res = new_rd_Shrs(dbg, block, curr, k_node, mode);
+
+ if (n_flag) { /* negate the result */
+ ir_node *k_node;
+
+ k_node = new_r_Const(irg, get_mode_null(mode));
+ res = new_rd_Sub(dbg, block, k_node, res, mode);
+ }
+ } else { /* unsigned case */
+ ir_node *k_node;
+
+ k_node = new_r_Const_long(irg, mode_Iu, k);
+ res = new_rd_Shr(dbg, block, left, k_node, mode);
+ }
+ } else {
+ /* other constant */
+ if (allow_Mulh(params, mode))
+ res = replace_div_by_mulh(irn, tv);
+ }
+ }
+
+ if (res != irn)
+ hook_arch_dep_replace_division_by_const(irn);
+
+ return res;
+}
+
+/* Replace Mods with Shifts and Add/Subs and Mulh. */
+ir_node *arch_dep_replace_mod_by_const(ir_node *irn)
+{
+ const ir_settings_arch_dep_t *params = be_get_backend_param()->dep_param;
+ ir_node *res = irn;
+
+ /* If the architecture dependent optimizations were not initialized
+ or this optimization was not enabled. */
+ if (params == NULL || (opts & arch_dep_mod_by_const) == 0)
+ return irn;
+
+ if (is_Mod(irn)) {
+ ir_node *c = get_Mod_right(irn);
+ ir_node *block, *left;
+ ir_mode *mode;
+ ir_tarval *tv, *ntv;
+ dbg_info *dbg;
+ int n, bits;
+ int k;
+
+ if (! is_Const(c))
+ return irn;
+
+ tv = get_Const_tarval(c);
+
+ /* check for division by zero */
+ if (tarval_is_null(tv))
+ return irn;
+
+ left = get_Mod_left(irn);
+ mode = get_irn_mode(left);
+ block = get_irn_n(irn, -1);
+ dbg = get_irn_dbg_info(irn);
+ bits = get_mode_size_bits(mode);
+ n = (bits + 7) / 8;
+
+ k = -1;
+ if (mode_is_signed(mode)) {
+ /* for signed divisions, the algorithm works for a / -2^k by negating the result */
+ ntv = tarval_neg(tv);
+ k = tv_ld2(ntv, n);
+ }
+
+ if (k < 0) {
+ k = tv_ld2(tv, n);
+ }
+
+ if (k >= 0) {
+ ir_graph *irg = get_irn_irg(irn);
+ /* division by 2^k or -2^k:
+ * we use "modulus" here, so x % y == x % -y that's why is no difference between the case 2^k and -2^k
+ */
+ if (mode_is_signed(mode)) {
+ ir_node *k_node;
+ ir_node *curr = left;
+
+ if (k != 1) {
+ k_node = new_r_Const_long(irg, mode_Iu, k - 1);
+ curr = new_rd_Shrs(dbg, block, left, k_node, mode);
+ }
+
+ k_node = new_r_Const_long(irg, mode_Iu, bits - k);
+ curr = new_rd_Shr(dbg, block, curr, k_node, mode);
+
+ curr = new_rd_Add(dbg, block, left, curr, mode);
+
+ k_node = new_r_Const_long(irg, mode, (-1) << k);
+ curr = new_rd_And(dbg, block, curr, k_node, mode);
+
+ res = new_rd_Sub(dbg, block, left, curr, mode);
+ } else { /* unsigned case */
+ ir_node *k_node;
+
+ k_node = new_r_Const_long(irg, mode, (1 << k) - 1);
+ res = new_rd_And(dbg, block, left, k_node, mode);
+ }
+ } else {
+ /* other constant */
+ if (allow_Mulh(params, mode)) {
+ res = replace_div_by_mulh(irn, tv);
+
+ res = new_rd_Mul(dbg, block, res, c, mode);
+
+ /* res = arch_dep_mul_to_shift(res); */
+
+ res = new_rd_Sub(dbg, block, left, res, mode);
+ }
+ }
+ }
+
+ if (res != irn)
+ hook_arch_dep_replace_division_by_const(irn);
+
+ return res;