+/**
+ * creates a tarval from a condensed representation.
+ */
+static ir_tarval *condensed_to_value(mul_env *env, unsigned char *R, int r)
+{
+ ir_tarval *res, *tv;
+ int i, j;
+
+ j = 0;
+ tv = get_mode_one(env->mode);
+ res = NULL;
+ for (i = 0; i < r; ++i) {
+ j = R[i];
+ if (j) {
+ ir_tarval *t = new_tarval_from_long(j, mode_Iu);
+ tv = tarval_shl(tv, t);
+ }
+ res = res ? tarval_add(res, tv) : tv;
+ }
+ return res;
+}
+
+/* forward */
+static instruction *basic_decompose_mul(mul_env *env, unsigned char *R, int r, ir_tarval *N);
+
+/*
+ * handle simple cases with up-to 2 bits set
+ */
+static instruction *decompose_simple_cases(mul_env *env, unsigned char *R, int r, ir_tarval *N)
+{
+ instruction *ins, *ins2;
+
+ (void) N;
+ if (r == 1) {
+ return emit_SHIFT(env, env->root, R[0]);
+ } else {
+ assert(r == 2);
+
+ ins = env->root;
+ if (R[1] <= env->max_S) {
+ ins = emit_LEA(env, ins, ins, R[1]);
+ if (R[0] != 0) {
+ ins = emit_SHIFT(env, ins, R[0]);
+ }
+ return ins;
+ }
+ if (R[0] != 0) {
+ ins = emit_SHIFT(env, ins, R[0]);
+ }
+
+ ins2 = emit_SHIFT(env, env->root, R[0] + R[1]);
+ return emit_LEA(env, ins, ins2, 0);
+ }
+}
+
+/**
+ * Main decompose driver.
+ */
+static instruction *decompose_mul(mul_env *env, unsigned char *R, int r, ir_tarval *N)
+{
+ unsigned i;
+ int gain;
+
+ if (r <= 2)
+ return decompose_simple_cases(env, R, r, N);
+
+ if (env->params->also_use_subs) {
+ gain = calculate_gain(R, r);
+ if (gain > 0) {
+ instruction *instr1, *instr2;
+ unsigned char *R1, *R2;
+ int r1, r2, i, k, j;
+
+ R1 = complement_condensed(env, R, r, gain, &r1);
+ r2 = r - gain + 1;
+ R2 = (unsigned char*)obstack_alloc(&env->obst, r2);
+
+ k = 1;
+ for (i = 0; i < gain; ++i) {
+ k += R[i];
+ }
+ R2[0] = k;
+ R2[1] = R[gain] - 1;
+ j = 2;
+ if (R2[1] == 0) {
+ /* Two identical bits: normalize */
+ ++R2[0];
+ --j;
+ --r2;
+ }
+ for (i = gain + 1; i < r; ++i) {
+ R2[j++] = R[i];
+ }
+
+ instr1 = decompose_mul(env, R1, r1, NULL);
+ instr2 = decompose_mul(env, R2, r2, NULL);
+ return emit_SUB(env, instr2, instr1);
+ }
+ }
+
+ if (N == NULL)
+ N = condensed_to_value(env, R, r);
+
+ for (i = env->max_S; i > 0; --i) {
+ ir_tarval *div_res, *mod_res;
+ ir_tarval *tv = new_tarval_from_long((1 << i) + 1, env->mode);
+
+ div_res = tarval_divmod(N, tv, &mod_res);
+ if (mod_res == get_mode_null(env->mode)) {
+ unsigned char *Rs;
+ int rs;
+
+ Rs = value_to_condensed(env, div_res, &rs);
+ if (rs < r) {
+ instruction *N1 = decompose_mul(env, Rs, rs, div_res);
+ return emit_LEA(env, N1, N1, i);
+ }
+ }
+ }
+ return basic_decompose_mul(env, R, r, N);
+}
+
+#define IMAX(a,b) ((a) > (b) ? (a) : (b))
+
+/**
+ * basic decomposition routine
+ */
+static instruction *basic_decompose_mul(mul_env *env, unsigned char *R, int r, ir_tarval *N)
+{
+ instruction *Ns;
+ unsigned t;
+
+ if (R[0] == 0) { /* Case 1 */
+ t = R[1] > IMAX(env->max_S, R[1]);
+ R[1] -= t;
+ Ns = decompose_mul(env, &R[1], r - 1, N);
+ return emit_LEA(env, env->root, Ns, t);
+ } else if (R[0] <= env->max_S) { /* Case 2 */
+ t = R[0];
+ R[1] += t;
+ Ns = decompose_mul(env, &R[1], r - 1, N);
+ return emit_LEA(env, Ns, env->root, t);
+ } else {
+ t = R[0];
+ R[0] = 0;
+ Ns = decompose_mul(env, R, r, N);
+ return emit_SHIFT(env, Ns, t);
+ }
+}
+
+/**
+ * recursive build the graph form the instructions.
+ *
+ * @param env the environment
+ * @param inst the instruction
+ */
+static ir_node *build_graph(mul_env *env, instruction *inst)
+{
+ ir_node *l, *r, *c;
+ ir_graph *irg = env->irg;
+
+ if (inst->irn)
+ return inst->irn;
+
+ switch (inst->kind) {
+ case LEA:
+ l = build_graph(env, inst->in[0]);
+ r = build_graph(env, inst->in[1]);
+ c = new_r_Const_long(irg, env->shf_mode, inst->shift_count);
+ r = new_rd_Shl(env->dbg, env->blk, r, c, env->mode);
+ return inst->irn = new_rd_Add(env->dbg, env->blk, l, r, env->mode);
+ case SHIFT:
+ l = build_graph(env, inst->in[0]);
+ c = new_r_Const_long(irg, env->shf_mode, inst->shift_count);
+ return inst->irn = new_rd_Shl(env->dbg, env->blk, l, c, env->mode);
+ case SUB:
+ l = build_graph(env, inst->in[0]);
+ r = build_graph(env, inst->in[1]);
+ return inst->irn = new_rd_Sub(env->dbg, env->blk, l, r, env->mode);
+ case ADD:
+ l = build_graph(env, inst->in[0]);
+ r = build_graph(env, inst->in[1]);
+ return inst->irn = new_rd_Add(env->dbg, env->blk, l, r, env->mode);
+ case ZERO:
+ return inst->irn = new_r_Const(irg, get_mode_null(env->mode));
+ default:
+ panic("Unsupported instruction kind");
+ }
+}
+
+/**
+ * Calculate the costs for the given instruction sequence.
+ * Note that additional costs due to higher register pressure are NOT evaluated yet
+ */
+static int evaluate_insn(mul_env *env, instruction *inst)
+{
+ int costs;
+
+ if (inst->costs >= 0) {
+ /* was already evaluated */
+ return 0;
+ }
+
+ switch (inst->kind) {
+ case LEA:
+ case SUB:
+ case ADD:
+ costs = evaluate_insn(env, inst->in[0]);
+ costs += evaluate_insn(env, inst->in[1]);
+ costs += env->evaluate(inst->kind, env->mode, NULL);
+ inst->costs = costs;
+ return costs;
+ case SHIFT:
+ if (inst->shift_count > env->params->highest_shift_amount)
+ env->fail = 1;
+ if (env->n_shift <= 0)
+ env->fail = 1;
+ else
+ --env->n_shift;
+ costs = evaluate_insn(env, inst->in[0]);
+ costs += env->evaluate(inst->kind, env->mode, NULL);
+ inst->costs = costs;
+ return costs;
+ case ZERO:
+ inst->costs = costs = env->evaluate(inst->kind, env->mode, NULL);
+ return costs;
+ case MUL:
+ case ROOT:
+ break;
+ }
+ panic("Unsupported instruction kind");
+}
+
+/**
+ * Evaluate the replacement instructions and build a new graph
+ * if faster than the Mul.
+ * Returns the root of the new graph then or irn otherwise.
+ *
+ * @param irn the Mul operation
+ * @param operand the multiplication operand
+ * @param tv the multiplication constant
+ *
+ * @return the new graph
+ */
+static ir_node *do_decomposition(ir_node *irn, ir_node *operand, ir_tarval *tv)
+{
+ mul_env env;
+ instruction *inst;
+ unsigned char *R;
+ int r;
+ ir_node *res = irn;
+ int mul_costs;
+
+ obstack_init(&env.obst);
+ env.params = be_get_backend_param()->dep_param;
+ env.mode = get_tarval_mode(tv);
+ env.bits = (unsigned)get_mode_size_bits(env.mode);
+ env.max_S = 3;
+ env.root = emit_ROOT(&env, operand);
+ env.fail = 0;
+ env.n_shift = env.params->maximum_shifts;
+ env.evaluate = env.params->evaluate != NULL ? env.params->evaluate : default_evaluate;
+ env.irg = get_irn_irg(irn);
+
+ R = value_to_condensed(&env, tv, &r);
+ inst = decompose_mul(&env, R, r, tv);
+
+ /* the paper suggests 70% here */
+ mul_costs = (env.evaluate(MUL, env.mode, tv) * 7 + 5) / 10;
+ if (evaluate_insn(&env, inst) <= mul_costs && !env.fail) {
+ env.op = operand;
+ env.blk = get_nodes_block(irn);
+ env.dbg = get_irn_dbg_info(irn);
+ env.shf_mode = find_unsigned_mode(env.mode);
+ if (env.shf_mode == NULL)
+ env.shf_mode = mode_Iu;
+
+ res = build_graph(&env, inst);
+ }
+ obstack_free(&env.obst, NULL);
+ return res;
+}
+
+/* Replace Muls with Shifts and Add/Subs. */