+/**
+ * recursive build the graph form the instructions.
+ *
+ * @param env the environment
+ * @param inst the instruction
+ */
+static ir_node *build_graph(mul_env *env, instruction *inst) {
+ ir_node *l, *r, *c;
+
+ if (inst->irn)
+ return inst->irn;
+
+ switch (inst->kind) {
+ case LEA:
+ l = build_graph(env, inst->in[0]);
+ r = build_graph(env, inst->in[1]);
+ c = new_Const_long(env->shf_mode, inst->shift_count);
+ r = new_rd_Shl(env->dbg, current_ir_graph, env->blk, r, c, env->mode);
+ return inst->irn = new_rd_Add(env->dbg, current_ir_graph, env->blk, l, r, env->mode);
+ case SHIFT:
+ l = build_graph(env, inst->in[0]);
+ c = new_Const_long(env->shf_mode, inst->shift_count);
+ return inst->irn = new_rd_Shl(env->dbg, current_ir_graph, env->blk, l, c, env->mode);
+ case SUB:
+ l = build_graph(env, inst->in[0]);
+ r = build_graph(env, inst->in[1]);
+ return inst->irn = new_rd_Sub(env->dbg, current_ir_graph, env->blk, l, r, env->mode);
+ case ADD:
+ l = build_graph(env, inst->in[0]);
+ r = build_graph(env, inst->in[1]);
+ return inst->irn = new_rd_Add(env->dbg, current_ir_graph, env->blk, l, r, env->mode);
+ case ZERO:
+ return inst->irn = new_Const(get_mode_null(env->mode));
+ default:
+ panic("Unsupported instruction kind");
+ return NULL;
+ }
+}
+
+/**
+ * Calculate the costs for the given instruction sequence.
+ * Note that additional costs due to higher register pressure are NOT evaluated yet
+ */
+static int evaluate_insn(mul_env *env, instruction *inst) {
+ int costs;
+
+ if (inst->costs >= 0) {
+ /* was already evaluated */
+ return 0;
+ }
+
+ switch (inst->kind) {
+ case LEA:
+ case SUB:
+ case ADD:
+ costs = evaluate_insn(env, inst->in[0]);
+ costs += evaluate_insn(env, inst->in[1]);
+ costs += env->evaluate(inst->kind, NULL);
+ inst->costs = costs;
+ return costs;
+ case SHIFT:
+ if (inst->shift_count > params->highest_shift_amount)
+ env->fail = 1;
+ if (env->n_shift <= 0)
+ env->fail = 1;
+ else
+ --env->n_shift;
+ costs = evaluate_insn(env, inst->in[0]);
+ costs += env->evaluate(inst->kind, NULL);
+ inst->costs = costs;
+ return costs;
+ case ZERO:
+ inst->costs = costs = env->evaluate(inst->kind, NULL);
+ return costs;
+ case MUL:
+ case ROOT:
+ break;
+ }
+ panic("Unsupported instruction kind");
+}
+
+/**
+ * Evaluate the replacement instructions and build a new graph
+ * if faster than the Mul.
+ * Returns the root of the new graph then or irn otherwise.
+ *
+ * @param irn the Mul operation
+ * @param operand the multiplication operand
+ * @param tv the multiplication constant
+ *
+ * @return the new graph
+ */
+static ir_node *do_decomposition(ir_node *irn, ir_node *operand, tarval *tv) {
+ mul_env env;
+ instruction *inst;
+ unsigned char *R;
+ int r;
+ ir_node *res = irn;
+ int mul_costs;
+
+ obstack_init(&env.obst);
+ env.mode = get_tarval_mode(tv);
+ env.bits = (unsigned)get_mode_size_bits(env.mode);
+ env.max_S = 3;
+ env.root = emit_ROOT(&env, operand);
+ env.fail = 0;
+ env.n_shift = params->maximum_shifts;
+ env.evaluate = params->evaluate != NULL ? params->evaluate : default_evaluate;
+
+ R = value_to_condensed(&env, tv, &r);
+ inst = decompose_mul(&env, R, r, tv);
+
+ /* the paper suggests 70% here */
+ mul_costs = (env.evaluate(MUL, tv) * 7 + 5) / 10;
+ if (evaluate_insn(&env, inst) <= mul_costs && !env.fail) {
+ env.op = operand;
+ env.blk = get_nodes_block(irn);
+ env.dbg = get_irn_dbg_info(irn);
+ env.shf_mode = find_unsigned_mode(env.mode);
+ if (env.shf_mode == NULL)
+ env.shf_mode = mode_Iu;
+
+ res = build_graph(&env, inst);
+ }
+ obstack_free(&env.obst, NULL);
+ return res;
+}
+
+/* Replace Muls with Shifts and Add/Subs. */
+ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn) {
+ ir_node *res = irn;
+ ir_mode *mode = get_irn_mode(irn);
+
+ /* If the architecture dependent optimizations were not initialized
+ or this optimization was not enabled. */
+ if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
+ return irn;
+
+ set_arch_dep_running(1);
+ {
+ if (is_Mul(irn) && mode_is_int(mode)) {
+ ir_node *left = get_binop_left(irn);
+ ir_node *right = get_binop_right(irn);
+ tarval *tv = NULL;
+ ir_node *operand = NULL;
+
+ /* Look, if one operand is a constant. */
+ if (is_Const(left)) {
+ tv = get_Const_tarval(left);
+ operand = right;
+ } else if (is_Const(right)) {
+ tv = get_Const_tarval(right);
+ operand = left;
+ }
+
+ if (tv != NULL) {
+ res = do_decomposition(irn, operand, tv);
+
+ if (res != irn) {
+ hook_arch_dep_replace_mul_with_shifts(irn);
+ exchange(irn, res);
+ }
+ }
+ }
+ }
+ //set_arch_dep_running(0);
+
+ return res;
+}
+
+/**
+ * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
+ */
+static int tv_ld2(tarval *tv, int bits) {
+ int i, k = 0, num;
+
+ for (num = i = 0; i < bits; ++i) {
+ unsigned char v = get_tarval_sub_bits(tv, i);
+
+ if (v) {
+ int j;
+
+ for (j = 0; j < 8; ++j)
+ if ((1 << j) & v) {
+ ++num;
+ k = 8 * i + j;
+ }
+ }
+ }
+ if (num == 1)
+ return k;
+ return -1;
+}
+
+
+/* for shorter lines */
+#define ABS(a) tarval_abs(a)
+#define NEG(a) tarval_neg(a)
+#define NOT(a) tarval_not(a)
+#define SHL(a, b) tarval_shl(a, b)
+#define SHR(a, b) tarval_shr(a, b)
+#define ADD(a, b) tarval_add(a, b)
+#define SUB(a, b) tarval_sub(a, b, NULL)
+#define MUL(a, b) tarval_mul(a, b)
+#define DIV(a, b) tarval_div(a, b)
+#define MOD(a, b) tarval_mod(a, b)
+#define CMP(a, b) tarval_cmp(a, b)
+#define CNV(a, m) tarval_convert_to(a, m)
+#define ONE(m) get_mode_one(m)
+#define ZERO(m) get_mode_null(m)
+
+/** The result of a the magic() function. */
+struct ms {
+ tarval *M; /**< magic number */
+ int s; /**< shift amount */
+ int need_add; /**< an additional add is needed */
+ int need_sub; /**< an additional sub is needed */
+};
+
+/**
+ * Signed division by constant d: calculate the Magic multiplier M and the shift amount s
+ *
+ * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation into a Compiler
+ */
+static struct ms magic(tarval *d) {
+ ir_mode *mode = get_tarval_mode(d);
+ ir_mode *u_mode = find_unsigned_mode(mode);
+ int bits = get_mode_size_bits(u_mode);
+ int p;
+ tarval *ad, *anc, *delta, *q1, *r1, *q2, *r2, *t; /* unsigned */
+ pn_Cmp d_cmp, M_cmp;