4 * @author Sebastian Hack
5 * @brief Machine dependent firm optimizations.
13 #include "irgraph_t.h"
20 #include "dbginfo_t.h"
21 #include "iropt_dbg.h"
32 /** The params got from the factory in arch_dep_init(...). */
33 static const arch_dep_params_t *params = NULL;
35 /** The bit mask, which optimizations to apply. */
36 static arch_dep_opts_t opts;
38 void arch_dep_init(arch_dep_params_factory_t factory)
46 void arch_dep_set_opts(arch_dep_opts_t the_opts) {
50 ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
53 ir_mode *mode = get_irn_mode(irn);
55 /* If the architecture dependent optimizations were not initialized
56 or this optimization was not enabled. */
57 if(params == NULL || (opts & arch_dep_mul_to_shift) == 0)
60 if(get_irn_opcode(irn) == iro_Mul && mode_is_int(mode)) {
61 ir_node *block = get_nodes_block(irn);
62 ir_node *left = get_binop_left(irn);
63 ir_node *right = get_binop_right(irn);
65 ir_node *operand = NULL;
67 /* Look, if one operand is a constant. */
68 if(get_irn_opcode(left) == iro_Const) {
69 tv = get_Const_tarval(left);
71 } else if(get_irn_opcode(right) == iro_Const) {
72 tv = get_Const_tarval(right);
77 int maximum_shifts = params->maximum_shifts;
78 int also_use_subs = params->also_use_subs;
79 int highest_shift_amount = params->highest_shift_amount;
81 char *bitstr = get_tarval_bitpattern(tv);
87 char compr[MAX_BITSTR];
91 int shift_with_sub[MAX_BITSTR] = { 0 };
92 int shift_without_sub[MAX_BITSTR] = { 0 };
93 int shift_with_sub_pos = 0;
94 int shift_without_sub_pos = 0;
98 long val = get_tarval_long(tv);
99 fprintf(stderr, "Found mul with %ld(%lx) = ", val, val);
100 for(p = bitstr; *p != '\0'; p++)
106 for(p = bitstr; *p != '\0'; p++) {
110 /* The last was 1 we are now at 0 OR
111 * The last was 0 and we are now at 1 */
112 compr[compr_len++] = counter;
120 compr[compr_len++] = counter;
125 const char *prefix = "";
126 for(i = 0; i < compr_len; i++, prefix = ",")
127 fprintf(stderr, "%s%d", prefix, compr[i]);
132 // Go over all recorded one groups.
135 for(i = 1; i < compr_len; i = end_of_group + 2) {
136 int j, zeros_in_group, ones_in_group;
138 ones_in_group = compr[i];
141 // Scan for singular 0s in a sequence
142 for(j = i + 1; j < compr_len && compr[j] == 1; j += 2) {
144 ones_in_group += (j + 1 < compr_len ? compr[j + 1] : 0);
146 end_of_group = j - 1;
148 if(zeros_in_group >= ones_in_group - 1)
152 fprintf(stderr, " i:%d, eg:%d\n", i, end_of_group);
155 singleton = compr[i] == 1 && i == end_of_group;
156 for(j = i; j <= end_of_group; j += 2) {
157 int curr_ones = compr[j];
158 int biased_curr_bit = curr_bit + 1;
162 fprintf(stderr, " j:%d, ones:%d\n", j, curr_ones);
165 // If this ones group is a singleton group (it has no
166 // singleton zeros inside
168 shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
170 shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
172 for(k = 0; k < curr_ones; k++)
173 shift_without_sub[shift_without_sub_pos++] = biased_curr_bit + k;
175 curr_bit += curr_ones;
176 biased_curr_bit = curr_bit + 1;
178 if(!singleton && j == end_of_group)
179 shift_with_sub[shift_with_sub_pos++] = biased_curr_bit;
180 else if(j != end_of_group)
181 shift_with_sub[shift_with_sub_pos++] = -biased_curr_bit;
183 curr_bit += compr[j + 1];
189 int *shifts = shift_with_sub;
190 int n = shift_with_sub_pos;
191 int highest_shift_wide = 0;
192 int highest_shift_seq = 0;
195 /* If we may not use subs, or we can achive the same with adds,
197 if(!also_use_subs || shift_with_sub_pos >= shift_without_sub_pos) {
198 shifts = shift_without_sub;
199 n = shift_without_sub_pos;
202 /* If the number of needed shifts exceeds the given maximum,
203 use the Mul and exit. */
204 if(n > maximum_shifts) {
206 fprintf(stderr, "Only allowed %d shifts, but %d are needed\n",
212 /* Compute the highest shift needed for both, the
213 sequential and wide representations. */
214 for(i = 0; i < n; i++) {
215 int curr = abs(shifts[i]);
216 int curr_seq = curr - last;
218 highest_shift_wide = curr > highest_shift_wide ? curr
219 : highest_shift_wide;
220 highest_shift_seq = curr_seq > highest_shift_seq ? curr_seq
226 /* If the highest shift amount is greater than the given limit,
228 if(highest_shift_seq > highest_shift_amount) {
230 fprintf(stderr, "Shift argument %d exceeds maximum %d\n",
231 highest_shift_seq, highest_shift_amount);
236 /* If we have subs, we cannot do sequential. */
237 if(1 /* also_use_subs */) {
239 ir_node *curr = NULL;
244 int curr_shift = shifts[i];
245 int sub = curr_shift < 0;
246 int amount = abs(curr_shift) - 1;
247 ir_node *aux = operand;
250 assert(amount >= 0 && "What is a negative shift??");
253 tarval *shift_amount = new_tarval_from_long(amount, mode_Iu);
254 ir_node *cnst = new_r_Const(current_ir_graph, block, mode_Iu, shift_amount);
255 aux = new_r_Shl(current_ir_graph, block, operand, cnst, mode);
260 curr = new_r_Sub(current_ir_graph, block, curr, aux, mode);
262 curr = new_r_Add(current_ir_graph, block, curr, aux, mode);
274 const char *prefix = "";
275 for(i = 0; i < n; i++) {
276 fprintf(stderr, "%s%d", prefix, shifts[i]);
279 fprintf(stderr, "\n");
292 stat_arch_dep_replace_mul_with_shifts(irn);
297 ir_node *arch_dep_replace_div_with_shifts(ir_node *irn)
301 /* If the architecture dependent optimizations were not initialized
302 or this optimization was not enabled. */
303 if (params == NULL || (opts & arch_dep_div_to_shift) == 0)
306 if (get_irn_opcode(irn) == iro_Div) {
307 ir_node *c = get_Div_right(irn);
308 ir_node *block, *left;
315 if (get_irn_op(c) != op_Const)
318 left = get_Div_left(irn);
319 mode = get_irn_mode(left);
320 block = get_nodes_block(irn);
321 dbg = get_irn_dbg_info(irn);
322 tv = get_Const_tarval(c);
324 bits = get_mode_size_bits(mode);
327 for (num = i = 0; i < n; ++i) {
328 unsigned char v = get_tarval_sub_bits(tv, i);
333 for (j = 0; j < 8; ++j)
341 if (num == 1) { /* division by 2^k */
343 if (mode_is_signed(mode)) {
345 ir_node *curr = left;
348 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
349 curr = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
352 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
353 curr = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
355 curr = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
357 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
358 res = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
360 else { /* unsigned case */
363 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
364 res = new_rd_Shl(dbg, current_ir_graph, block, left, k_node, mode);
370 stat_arch_dep_replace_div_with_shifts(irn);
375 ir_node *arch_dep_replace_mod_with_shifts(ir_node *irn)
379 /* If the architecture dependent optimizations were not initialized
380 or this optimization was not enabled. */
381 if (params == NULL || (opts & arch_dep_mod_to_shift) == 0)
384 if (get_irn_opcode(irn) == iro_Mod) {
385 ir_node *c = get_Mod_right(irn);
386 ir_node *block, *left;
393 if (get_irn_op(c) != op_Const)
396 left = get_Mod_left(irn);
397 mode = get_irn_mode(left);
398 block = get_nodes_block(irn);
399 dbg = get_irn_dbg_info(irn);
400 tv = get_Const_tarval(c);
402 bits = get_mode_size_bits(mode);
405 for (num = i = 0; i < n; ++i) {
406 unsigned char v = get_tarval_sub_bits(tv, i);
411 for (j = 0; j < 8; ++j)
419 if (num == 1) { /* remainder by 2^k */
421 if (mode_is_signed(mode)) {
423 ir_node *curr = left;
426 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
427 curr = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
430 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
431 curr = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
433 curr = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
435 k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((-1) << k, mode));
436 curr = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
438 res = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
440 else { /* unsigned case */
443 k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((1 << k) - 1, mode));
444 res = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
450 stat_arch_dep_replace_mod_with_shifts(irn);
455 void arch_dep_replace_divmod_with_shifts(ir_node **div, ir_node **mod, ir_node *irn)
459 /* If the architecture dependent optimizations were not initialized
460 or this optimization was not enabled. */
461 if (params == NULL ||
462 (opts & (arch_dep_div_to_shift|arch_dep_mod_to_shift) != (arch_dep_div_to_shift|arch_dep_mod_to_shift)))
465 if (get_irn_opcode(irn) == iro_DivMod) {
466 ir_node *c = get_DivMod_right(irn);
467 ir_node *block, *left;
474 if (get_irn_op(c) != op_Const)
477 left = get_DivMod_left(irn);
478 mode = get_irn_mode(left);
479 block = get_nodes_block(irn);
480 dbg = get_irn_dbg_info(irn);
481 tv = get_Const_tarval(c);
483 bits = get_mode_size_bits(mode);
486 for (num = i = 0; i < n; ++i) {
487 unsigned char v = get_tarval_sub_bits(tv, i);
492 for (j = 0; j < 8; ++j)
500 if (num == 1) { /* division & remainder by 2^k */
502 if (mode_is_signed(mode)) {
504 ir_node *curr = left;
507 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k - 1, mode_Iu));
508 curr = new_rd_Shrs(dbg, current_ir_graph, block, left, k_node, mode);
511 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(bits - k, mode_Iu));
512 curr = new_rd_Shr(dbg, current_ir_graph, block, curr, k_node, mode);
514 curr = new_rd_Add(dbg, current_ir_graph, block, left, curr, mode);
516 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
517 *div = new_rd_Shrs(dbg, current_ir_graph, block, curr, k_node, mode);
519 k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((-1) << k, mode));
520 curr = new_rd_And(dbg, current_ir_graph, block, curr, k_node, mode);
522 *mod = new_rd_Sub(dbg, current_ir_graph, block, left, curr, mode);
524 else { /* unsigned case */
527 k_node = new_r_Const(current_ir_graph, block, mode_Iu, new_tarval_from_long(k, mode_Iu));
528 *div = new_rd_Shl(dbg, current_ir_graph, block, left, k_node, mode);
530 k_node = new_r_Const(current_ir_graph, block, mode, new_tarval_from_long((1 << k) - 1, mode));
531 *mod = new_rd_And(dbg, current_ir_graph, block, left, k_node, mode);
537 stat_arch_dep_replace_DivMod_with_shifts(irn);
541 static const arch_dep_params_t default_params = {
542 1, /* also use subs */
543 4, /* maximum shifts */
544 31 /* maximum shift amount */
547 const arch_dep_params_t *arch_dep_default_factory(void) {
548 return &default_params;