From 081ef4f7ca73620500e5494f882ee6095c8bb983 Mon Sep 17 00:00:00 2001
From: Michael Beck <beck@ipd.info.uni-karlsruhe.de>
Date: Sat, 19 Apr 2008 23:58:08 +0000
Subject: [PATCH] - add optimisation for size

[r19340]
---
 ir/be/ia32/ia32_architecture.c | 39 ++++++++++++++++++++++++++-------
 ir/be/ia32/ia32_architecture.h |  2 ++
 ir/be/ia32/ia32_emitter.c      | 40 ++++++++++++++++++----------------
 3 files changed, 54 insertions(+), 27 deletions(-)

diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c
index f5da3c24c..08fcf942d 100644
--- a/ir/be/ia32/ia32_architecture.c
+++ b/ir/be/ia32/ia32_architecture.c
@@ -113,6 +113,7 @@ enum cpu_support {
 	cpu_c3_2        = arch_ppro | arch_feature_sse1,  /* really no 3DNow! */
 };
 
+static int         opt_size             = 0;
 static cpu_support arch                 = cpu_generic;
 static cpu_support opt_arch             = cpu_core2;
 static int         use_sse2             = 0;
@@ -191,6 +192,7 @@ static lc_opt_enum_int_var_t fp_unit_var = {
 };
 
 static const lc_opt_table_entry_t ia32_architecture_options[] = {
+	LC_OPT_ENT_BOOL("size",            "optimize for size", &opt_size),
 	LC_OPT_ENT_ENUM_INT("arch",        "select the instruction architecture",
 	                    &arch_var),
 	LC_OPT_ENT_ENUM_INT("opt",         "optimize for instruction architecture",
@@ -215,6 +217,18 @@ typedef struct insn_const {
 	unsigned label_alignment_max_skip; /**< maximum skip for alignment of loops labels */
 } insn_const;
 
+/* costs for optimizing for size */
+static const insn_const size_cost = {
+	2,   /* cost of an add instruction */
+	3,   /* cost of a lea instruction */
+	3,   /* cost of a constant shift instruction */
+	3,   /* starting cost of a multiply instruction */
+	0,   /* cost of multiply for every set bit */
+	0,   /* logarithm for alignment of function labels */
+	0,   /* logarithm for alignment of loops labels */
+	0,   /* maximum skip for alignment of loops labels */
+};
+
 /* costs for the i386 */
 static const insn_const i386_cost = {
 	1,   /* cost of an add instruction */
@@ -375,6 +389,10 @@ static const insn_const *arch_costs = &generic32_cost;
 
 static void set_arch_costs(void)
 {
+	if (opt_size) {
+		arch_costs = &size_cost;
+		return;
+	}
 	switch (opt_arch & arch_mask) {
 	case arch_i386:
 		arch_costs = &i386_cost;
@@ -459,12 +477,13 @@ void ia32_setup_cg_config(void)
 
 	set_arch_costs();
 
+	ia32_cg_config.optimize_size        = opt_size != 0;
 	/* on newer intel cpus mov, pop is often faster then leave although it has a
 	 * longer opcode */
 	ia32_cg_config.use_leave            = FLAGS(opt_arch, arch_i386 | arch_all_amd | arch_core2);
 	/* P4s don't like inc/decs because they only partially write the flags
 	   register which produces false dependencies */
-	ia32_cg_config.use_incdec           = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_geode);
+	ia32_cg_config.use_incdec           = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_geode) || opt_size;
 	ia32_cg_config.use_sse2             = use_sse2;
 	ia32_cg_config.use_ffreep           = FLAGS(opt_arch, arch_athlon_plus);
 	ia32_cg_config.use_ftst             = !FLAGS(arch, arch_feature_p6_insn);
@@ -475,17 +494,21 @@ void ia32_setup_cg_config(void)
 	ia32_cg_config.use_modeD_moves      = FLAGS(opt_arch, arch_athlon_plus | arch_geode | arch_ppro |
 	                                            arch_netburst | arch_nocona | arch_core2 | arch_generic32);
 	ia32_cg_config.use_add_esp_4        = FLAGS(opt_arch, arch_geode | arch_athlon_plus |
-	                                            arch_netburst | arch_nocona | arch_core2 | arch_generic32);
+	                                            arch_netburst | arch_nocona | arch_core2 | arch_generic32) &&
+	                                      !opt_size;
 	ia32_cg_config.use_add_esp_8        = FLAGS(opt_arch, arch_geode | arch_athlon_plus |
 	                                            arch_i386 | arch_i486 | arch_ppro | arch_netburst |
-	                                            arch_nocona | arch_core2 | arch_generic32);
+	                                            arch_nocona | arch_core2 | arch_generic32) &&
+	                                      !opt_size;
 	ia32_cg_config.use_sub_esp_4        = FLAGS(opt_arch, arch_athlon_plus | arch_ppro |
-	                                            arch_netburst | arch_nocona | arch_core2 | arch_generic32);
+	                                            arch_netburst | arch_nocona | arch_core2 | arch_generic32) &&
+	                                      !opt_size;
 	ia32_cg_config.use_sub_esp_8        = FLAGS(opt_arch, arch_athlon_plus | arch_i386 | arch_i486 |
-	                                            arch_ppro | arch_netburst | arch_nocona | arch_core2 | arch_generic32);
-	ia32_cg_config.use_imul_mem_imm32   = !FLAGS(opt_arch, arch_k8 | arch_k10);
-	ia32_cg_config.use_mov_0            = FLAGS(opt_arch, arch_k6);
-	ia32_cg_config.use_pad_return       = FLAGS(opt_arch, arch_athlon_plus | cpu_core2 | arch_generic32);
+	                                            arch_ppro | arch_netburst | arch_nocona | arch_core2 | arch_generic32) &&
+	                                      !opt_size;
+	ia32_cg_config.use_imul_mem_imm32   = !FLAGS(opt_arch, arch_k8 | arch_k10) || opt_size;
+	ia32_cg_config.use_mov_0            = FLAGS(opt_arch, arch_k6) && !opt_size;
+	ia32_cg_config.use_pad_return       = FLAGS(opt_arch, arch_athlon_plus | cpu_core2 | arch_generic32) && !opt_size;
 	ia32_cg_config.optimize_cc          = opt_cc;
 	ia32_cg_config.use_unsafe_floatconv = opt_unsafe_floatconv;
 
diff --git a/ir/be/ia32/ia32_architecture.h b/ir/be/ia32/ia32_architecture.h
index 4c448d5ef..4401f60a0 100644
--- a/ir/be/ia32/ia32_architecture.h
+++ b/ir/be/ia32/ia32_architecture.h
@@ -27,6 +27,8 @@
 #define FIRM_BE_IA32_ARCHITECTURE_H
 
 typedef struct {
+	/** optimize for size */
+	unsigned optimize_size:1;
 	/** use leave in function epilogue */
 	unsigned use_leave:1;
 	/** use inc, dec instead of add ,1 and add, -1 */
diff --git a/ir/be/ia32/ia32_emitter.c b/ir/be/ia32/ia32_emitter.c
index e1fd9e92a..0ba657a18 100644
--- a/ir/be/ia32/ia32_emitter.c
+++ b/ir/be/ia32/ia32_emitter.c
@@ -2025,31 +2025,33 @@ static void ia32_emit_block_header(ir_node *block, ir_node *prev_block)
 		}
 	}
 
-	/* align the current block if:
-	 * a) if should be aligned due to its execution frequency
-	 * b) there is no fall-through here
-	 */
-	if (should_align_block(block, prev_block)) {
-		ia32_emit_align_label();
-	} else {
-		/* if the predecessor block has no fall-through,
-		   we can always align the label. */
-		int i;
-		ir_node *check_node = NULL;
+	if (ia32_cg_config.label_alignment > 0) {
+		/* align the current block if:
+		 * a) if should be aligned due to its execution frequency
+		 * b) there is no fall-through here
+		 */
+		if (should_align_block(block, prev_block)) {
+			ia32_emit_align_label();
+		} else {
+			/* if the predecessor block has no fall-through,
+			   we can always align the label. */
+			int i;
+			ir_node *check_node = NULL;
 
-		for (i = n_cfgpreds - 1; i >= 0; --i) {
-			ir_node *cfg_pred = get_Block_cfgpred(block, i);
+			for (i = n_cfgpreds - 1; i >= 0; --i) {
+				ir_node *cfg_pred = get_Block_cfgpred(block, i);
 
-			if (get_nodes_block(skip_Proj(cfg_pred)) == prev_block) {
-				check_node = cfg_pred;
-				break;
+				if (get_nodes_block(skip_Proj(cfg_pred)) == prev_block) {
+					check_node = cfg_pred;
+					break;
+				}
 			}
+			if (check_node == NULL || !is_fallthrough(check_node))
+				ia32_emit_align_label();
 		}
-		if (check_node == NULL || !is_fallthrough(check_node))
-			ia32_emit_align_label();
 	}
 
-	if(need_label) {
+	if (need_label) {
 		ia32_emit_block_name(block);
 		be_emit_char(':');
 
-- 
2.20.1