From: Matthias Braun <matze@braunis.de>
Date: Mon, 23 Apr 2007 09:11:12 +0000 (+0000)
Subject: improve/fix wrong alignment in block emitting
X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=391c4e7cb986558842e03b8183c140b81e0c6b19;p=libfirm

improve/fix wrong alignment in block emitting

[r13441]
---

diff --git a/ir/be/ia32/ia32_emitter.c b/ir/be/ia32/ia32_emitter.c
index 7d8dc2c92..4e8354f28 100644
--- a/ir/be/ia32/ia32_emitter.c
+++ b/ir/be/ia32/ia32_emitter.c
@@ -654,14 +654,21 @@ ir_node *get_cfop_target_block(const ir_node *irn) {
 	return get_irn_link(irn);
 }
 
+static
+void ia32_emit_block_name(ia32_emit_env_t *env, const ir_node *block)
+{
+	be_emit_cstring(env, BLOCK_PREFIX);
+	be_emit_irprintf(env->emit, "%d", get_irn_node_nr(block));
+}
+
 /**
  * Returns the target label for a control flow node.
  */
+static
 void ia32_emit_cfop_target(ia32_emit_env_t * env, const ir_node *node) {
 	ir_node *block = get_cfop_target_block(node);
 
-	be_emit_cstring(env, BLOCK_PREFIX);
-	be_emit_irprintf(env->emit, "%d", get_irn_node_nr(block));
+	ia32_emit_block_name(env, block);
 }
 
 /** Return the next block in Block schedule */
@@ -746,7 +753,7 @@ void finish_CondJmp(ia32_emit_env_t *env, const ir_node *node, ir_mode *mode,
 		ia32_emit_cfop_target(env, proj_false);
 		be_emit_finish_line_gas(env, proj_false);
 	} else {
-		be_emit_cstring(env, "\t/* fallthrough to");
+		be_emit_cstring(env, "\t/* fallthrough to ");
 		ia32_emit_cfop_target(env, proj_false);
 		be_emit_cstring(env, " */");
 		be_emit_finish_line_gas(env, proj_false);
@@ -1913,12 +1920,21 @@ void ia32_emit_align_label(ia32_emit_env_t *env, cpu_support cpu) {
 	ia32_emit_alignment(env, align, maximum_skip);
 }
 
+/**
+ * Test wether a block should be aligned.
+ * For cpus in the P4/Athlon class it is usefull to align jump labels to
+ * 16 bytes. However we should only do that if the alignment nops before the
+ * label aren't executed more often than we have jumps to the label.
+ */
 static
-int is_first_loop_block(ia32_emit_env_t *env, ir_node *block, ir_node *prev_block) {
-	ir_exec_freq *exec_freq = env->cg->birg->exec_freq;
-	double block_freq, prev_freq;
+int should_align_block(ia32_emit_env_t *env, ir_node *block, ir_node *prev) {
 	static const double DELTA = .0001;
-	cpu_support cpu = env->isa->opt_arch;
+	ir_exec_freq *exec_freq = env->cg->birg->exec_freq;
+	double        block_freq;
+	double        prev_freq = 0;  /**< execfreq of the fallthrough block */
+	double        jmp_freq  = 0;  /**< execfreq of all non-fallthrough blocks */
+	cpu_support   cpu       = env->isa->opt_arch;
+	int           i, n_cfgpreds;
 
 	if(exec_freq == NULL)
 		return 0;
@@ -1926,89 +1942,74 @@ int is_first_loop_block(ia32_emit_env_t *env, ir_node *block, ir_node *prev_bloc
 		return 0;
 
 	block_freq = get_block_execfreq(exec_freq, block);
-	prev_freq = get_block_execfreq(exec_freq, prev_block);
-
-	if(block_freq < DELTA || prev_freq < DELTA)
+	if(block_freq < DELTA)
 		return 0;
 
-	block_freq /= prev_freq;
+	n_cfgpreds = get_Block_n_cfgpreds(block);
+	for(i = 0; i < n_cfgpreds; ++i) {
+		ir_node *pred      = get_Block_cfgpred_block(block, i);
+		double   pred_freq = get_block_execfreq(exec_freq, pred);
+
+		if(pred == prev) {
+			assert(prev_freq == 0);
+			prev_freq += pred_freq;
+		} else {
+			jmp_freq  += pred_freq;
+		}
+	}
+
+	if(prev_freq < DELTA && !(jmp_freq < DELTA))
+		return 1;
+
+	jmp_freq /= prev_freq;
 
 	switch (cpu) {
 		case arch_athlon:
 		case arch_athlon_64:
 		case arch_k6:
-			return block_freq > 3;
+			return jmp_freq > 3;
 		default:
-			break;
+			return jmp_freq > 2;
 	}
-
-	return block_freq > 2;
 }
 
-/**
- * Walks over the nodes in a block connected by scheduling edges
- * and emits code for each node.
- */
 static
-void ia32_gen_block(ia32_emit_env_t *env, ir_node *block, ir_node *last_block) {
-	ir_graph      *irg         = get_irn_irg(block);
-	ir_node       *start_block = get_irg_start_block(irg);
-	int           need_label   = 1;
-	const ir_node *node;
-	int           i;
-
-	assert(is_Block(block));
-
-	if (block == start_block)
+void ia32_emit_block_header(ia32_emit_env_t *env, ir_node *block, ir_node *prev)
+{
+	int           n_cfgpreds;
+	int           need_label;
+	int           i, arity;
+	ir_exec_freq  *exec_freq = env->cg->birg->exec_freq;
+
+	need_label = 1;
+	n_cfgpreds = get_Block_n_cfgpreds(block);
+	if (n_cfgpreds == 0) {
 		need_label = 0;
-
-	if (need_label && get_irn_arity(block) == 1) {
-		ir_node *pred_block = get_Block_cfgpred_block(block, 0);
-
-		if (pred_block == last_block && get_irn_n_edges_kind(pred_block, EDGE_KIND_BLOCK) <= 2)
+	} else if (n_cfgpreds == 1) {
+		ir_node *pred       = get_Block_cfgpred(block, 0);
+		ir_node *pred_block = get_nodes_block(pred);
+
+		/* we don't need labels for fallthrough blocks, however switch-jmps
+		 * are no fallthoughs */
+		if(pred_block == prev &&
+				!(is_Proj(pred) && is_ia32_SwitchJmp(get_Proj_pred(pred)))) {
 			need_label = 0;
-	}
-
-	/* special case: if one of our cfg preds is a switch-jmp we need a label, */
-	/*               otherwise there might be jump table entries jumping to   */
-	/*               non-existent (omitted) labels                            */
-	for (i = get_Block_n_cfgpreds(block) - 1; i >= 0; --i) {
-		ir_node *pred = get_Block_cfgpred(block, i);
-
-		if (is_Proj(pred)) {
-			assert(get_irn_mode(pred) == mode_X);
-			if (is_ia32_SwitchJmp(get_Proj_pred(pred))) {
-				need_label = 1;
-				break;
-			}
+		} else {
+			need_label = 1;
 		}
+	} else {
+		need_label = 1;
 	}
 
-	if (need_label) {
-		int i, arity;
-		int align = 1;
-		ir_exec_freq *exec_freq = env->cg->birg->exec_freq;
-
-		/* align the loop headers */
-		if (! is_first_loop_block(env, block, last_block)) {
-			/* align blocks where the previous block has no fallthrough */
-			arity = get_irn_arity(block);
-
-			for (i = 0; i < arity; ++i) {
-				ir_node *predblock = get_Block_cfgpred_block(block, i);
-
-				if (predblock == last_block) {
-					align = 0;
-					break;
-				}
-			}
-		}
+	if (should_align_block(env, block, prev)) {
+		assert(need_label);
+		ia32_emit_align_label(env, env->isa->opt_arch);
+	}
 
-		if (align)
-			ia32_emit_align_label(env, env->isa->opt_arch);
+	if(need_label) {
+		ia32_emit_block_name(env, block);
+		be_emit_char(env, ':');
 
-		be_emit_cstring(env, BLOCK_PREFIX);
-		be_emit_irprintf(env->emit, "%d:", get_irn_node_nr(block));
 		be_emit_pad_comment(env);
 		be_emit_cstring(env, "   /* preds:");
 
@@ -2020,11 +2021,28 @@ void ia32_gen_block(ia32_emit_env_t *env, ir_node *block, ir_node *last_block) {
 		}
 
 		if (exec_freq != NULL) {
-			be_emit_irprintf(env->emit, " freq: %f", get_block_execfreq(exec_freq, block));
+			be_emit_irprintf(env->emit, " freq: %f",
+			                 get_block_execfreq(exec_freq, block));
 		}
 		be_emit_cstring(env, " */\n");
-		be_emit_write_line(env);
+	} else {
+		be_emit_cstring(env, "\t/* ");
+		ia32_emit_block_name(env, block);
+		be_emit_cstring(env, ": */\n");
 	}
+	be_emit_write_line(env);
+}
+
+/**
+ * Walks over the nodes in a block connected by scheduling edges
+ * and emits code for each node.
+ */
+static
+void ia32_gen_block(ia32_emit_env_t *env, ir_node *block, ir_node *last_block)
+{
+	const ir_node *node;
+
+	ia32_emit_block_header(env, block, last_block);
 
 	/* emit the contents of the block */
 	ia32_emit_dbg(env, block);