From 081ef4f7ca73620500e5494f882ee6095c8bb983 Mon Sep 17 00:00:00 2001 From: Michael Beck Date: Sat, 19 Apr 2008 23:58:08 +0000 Subject: [PATCH] - add optimisation for size [r19340] --- ir/be/ia32/ia32_architecture.c | 39 ++++++++++++++++++++++++++------- ir/be/ia32/ia32_architecture.h | 2 ++ ir/be/ia32/ia32_emitter.c | 40 ++++++++++++++++++---------------- 3 files changed, 54 insertions(+), 27 deletions(-) diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c index f5da3c24c..08fcf942d 100644 --- a/ir/be/ia32/ia32_architecture.c +++ b/ir/be/ia32/ia32_architecture.c @@ -113,6 +113,7 @@ enum cpu_support { cpu_c3_2 = arch_ppro | arch_feature_sse1, /* really no 3DNow! */ }; +static int opt_size = 0; static cpu_support arch = cpu_generic; static cpu_support opt_arch = cpu_core2; static int use_sse2 = 0; @@ -191,6 +192,7 @@ static lc_opt_enum_int_var_t fp_unit_var = { }; static const lc_opt_table_entry_t ia32_architecture_options[] = { + LC_OPT_ENT_BOOL("size", "optimize for size", &opt_size), LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var), LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture", @@ -215,6 +217,18 @@ typedef struct insn_const { unsigned label_alignment_max_skip; /**< maximum skip for alignment of loops labels */ } insn_const; +/* costs for optimizing for size */ +static const insn_const size_cost = { + 2, /* cost of an add instruction */ + 3, /* cost of a lea instruction */ + 3, /* cost of a constant shift instruction */ + 3, /* starting cost of a multiply instruction */ + 0, /* cost of multiply for every set bit */ + 0, /* logarithm for alignment of function labels */ + 0, /* logarithm for alignment of loops labels */ + 0, /* maximum skip for alignment of loops labels */ +}; + /* costs for the i386 */ static const insn_const i386_cost = { 1, /* cost of an add instruction */ @@ -375,6 +389,10 @@ static const insn_const *arch_costs = &generic32_cost; static void set_arch_costs(void) { + if (opt_size) { + arch_costs = &size_cost; + return; + } switch (opt_arch & arch_mask) { case arch_i386: arch_costs = &i386_cost; @@ -459,12 +477,13 @@ void ia32_setup_cg_config(void) set_arch_costs(); + ia32_cg_config.optimize_size = opt_size != 0; /* on newer intel cpus mov, pop is often faster then leave although it has a * longer opcode */ ia32_cg_config.use_leave = FLAGS(opt_arch, arch_i386 | arch_all_amd | arch_core2); /* P4s don't like inc/decs because they only partially write the flags register which produces false dependencies */ - ia32_cg_config.use_incdec = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_geode); + ia32_cg_config.use_incdec = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_geode) || opt_size; ia32_cg_config.use_sse2 = use_sse2; ia32_cg_config.use_ffreep = FLAGS(opt_arch, arch_athlon_plus); ia32_cg_config.use_ftst = !FLAGS(arch, arch_feature_p6_insn); @@ -475,17 +494,21 @@ void ia32_setup_cg_config(void) ia32_cg_config.use_modeD_moves = FLAGS(opt_arch, arch_athlon_plus | arch_geode | arch_ppro | arch_netburst | arch_nocona | arch_core2 | arch_generic32); ia32_cg_config.use_add_esp_4 = FLAGS(opt_arch, arch_geode | arch_athlon_plus | - arch_netburst | arch_nocona | arch_core2 | arch_generic32); + arch_netburst | arch_nocona | arch_core2 | arch_generic32) && + !opt_size; ia32_cg_config.use_add_esp_8 = FLAGS(opt_arch, arch_geode | arch_athlon_plus | arch_i386 | arch_i486 | arch_ppro | arch_netburst | - arch_nocona | arch_core2 | arch_generic32); + arch_nocona | arch_core2 | arch_generic32) && + !opt_size; ia32_cg_config.use_sub_esp_4 = FLAGS(opt_arch, arch_athlon_plus | arch_ppro | - arch_netburst | arch_nocona | arch_core2 | arch_generic32); + arch_netburst | arch_nocona | arch_core2 | arch_generic32) && + !opt_size; ia32_cg_config.use_sub_esp_8 = FLAGS(opt_arch, arch_athlon_plus | arch_i386 | arch_i486 | - arch_ppro | arch_netburst | arch_nocona | arch_core2 | arch_generic32); - ia32_cg_config.use_imul_mem_imm32 = !FLAGS(opt_arch, arch_k8 | arch_k10); - ia32_cg_config.use_mov_0 = FLAGS(opt_arch, arch_k6); - ia32_cg_config.use_pad_return = FLAGS(opt_arch, arch_athlon_plus | cpu_core2 | arch_generic32); + arch_ppro | arch_netburst | arch_nocona | arch_core2 | arch_generic32) && + !opt_size; + ia32_cg_config.use_imul_mem_imm32 = !FLAGS(opt_arch, arch_k8 | arch_k10) || opt_size; + ia32_cg_config.use_mov_0 = FLAGS(opt_arch, arch_k6) && !opt_size; + ia32_cg_config.use_pad_return = FLAGS(opt_arch, arch_athlon_plus | cpu_core2 | arch_generic32) && !opt_size; ia32_cg_config.optimize_cc = opt_cc; ia32_cg_config.use_unsafe_floatconv = opt_unsafe_floatconv; diff --git a/ir/be/ia32/ia32_architecture.h b/ir/be/ia32/ia32_architecture.h index 4c448d5ef..4401f60a0 100644 --- a/ir/be/ia32/ia32_architecture.h +++ b/ir/be/ia32/ia32_architecture.h @@ -27,6 +27,8 @@ #define FIRM_BE_IA32_ARCHITECTURE_H typedef struct { + /** optimize for size */ + unsigned optimize_size:1; /** use leave in function epilogue */ unsigned use_leave:1; /** use inc, dec instead of add ,1 and add, -1 */ diff --git a/ir/be/ia32/ia32_emitter.c b/ir/be/ia32/ia32_emitter.c index e1fd9e92a..0ba657a18 100644 --- a/ir/be/ia32/ia32_emitter.c +++ b/ir/be/ia32/ia32_emitter.c @@ -2025,31 +2025,33 @@ static void ia32_emit_block_header(ir_node *block, ir_node *prev_block) } } - /* align the current block if: - * a) if should be aligned due to its execution frequency - * b) there is no fall-through here - */ - if (should_align_block(block, prev_block)) { - ia32_emit_align_label(); - } else { - /* if the predecessor block has no fall-through, - we can always align the label. */ - int i; - ir_node *check_node = NULL; + if (ia32_cg_config.label_alignment > 0) { + /* align the current block if: + * a) if should be aligned due to its execution frequency + * b) there is no fall-through here + */ + if (should_align_block(block, prev_block)) { + ia32_emit_align_label(); + } else { + /* if the predecessor block has no fall-through, + we can always align the label. */ + int i; + ir_node *check_node = NULL; - for (i = n_cfgpreds - 1; i >= 0; --i) { - ir_node *cfg_pred = get_Block_cfgpred(block, i); + for (i = n_cfgpreds - 1; i >= 0; --i) { + ir_node *cfg_pred = get_Block_cfgpred(block, i); - if (get_nodes_block(skip_Proj(cfg_pred)) == prev_block) { - check_node = cfg_pred; - break; + if (get_nodes_block(skip_Proj(cfg_pred)) == prev_block) { + check_node = cfg_pred; + break; + } } + if (check_node == NULL || !is_fallthrough(check_node)) + ia32_emit_align_label(); } - if (check_node == NULL || !is_fallthrough(check_node)) - ia32_emit_align_label(); } - if(need_label) { + if (need_label) { ia32_emit_block_name(block); be_emit_char(':'); -- 2.20.1