From: Manuel Mohr <manuel.mohr@kit.edu>
Date: Wed, 31 Aug 2011 17:34:55 +0000 (+0200)
Subject: Improved CopyB lowering, made it part of target lowering.
X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=bb3144f01520732c3e22858e820ed9f7ca8c912f;p=libfirm

Improved CopyB lowering, made it part of target lowering.

Backends can configure CopyB lowering, so that it's possible to keep
CopyB nodes in a certain size range for special backend-specific
optimizations.  Furthermore, large CopyBs are turned into memcpy calls.
---

diff --git a/include/libfirm/lowering.h b/include/libfirm/lowering.h
index 70e38938c..56b046ad9 100644
--- a/include/libfirm/lowering.h
+++ b/include/libfirm/lowering.h
@@ -33,10 +33,25 @@
 #include "begin.h"
 
 /**
- * Lower CopyB nodes of size smaller that max_size into Loads/Stores
- */
-FIRM_API void lower_CopyB(ir_graph *irg, unsigned max_size,
-                          unsigned native_mode_bytes);
+ * Lower small CopyB nodes to Load/Store nodes, preserve medium-sized CopyB
+ * nodes and replace large CopyBs by a call to memcpy, depending on the given
+ * parameters.
+ *
+ * Small CopyB nodes (size <= max_small_size) are turned into a series of
+ * loads and stores.
+ * Medium-sized CopyB nodes (max_small_size < size < min_large_size) are
+ * left untouched.
+ * Large CopyB nodes (size >= min_large_size) are turned into a memcpy call.
+ *
+ * @param irg                 The graph to be lowered.
+ * @param max_small_size      The maximum number of bytes for a CopyB node so
+ *                            that it is still considered 'small'.
+ * @param min_large_size      The minimum number of bytes for a CopyB node so
+ *                            that it is regarded as 'large'.
+ * @param native_mode_bytes   Specify load/store size, typically register width.
+ */
+FIRM_API void lower_CopyB(ir_graph *irg, unsigned max_small_size,
+                          unsigned min_large_size, unsigned native_mode_bytes);
 
 /**
  * Lowers all Switches (Cond nodes with non-boolean mode) depending on spare_size.
diff --git a/ir/be/amd64/bearch_amd64.c b/ir/be/amd64/bearch_amd64.c
index 65257c4c5..5f50aeb5d 100644
--- a/ir/be/amd64/bearch_amd64.c
+++ b/ir/be/amd64/bearch_amd64.c
@@ -470,8 +470,19 @@ static int amd64_get_reg_class_alignment(const arch_register_class_t *cls)
 
 static void amd64_lower_for_target(void)
 {
+	size_t i, n_irgs = get_irp_n_irgs();
+
 	/* lower compound param handling */
 	lower_calls_with_compounds(LF_RETURN_HIDDEN);
+
+	for (i = 0; i < n_irgs; ++i) {
+		ir_graph *irg = get_irp_irg(i);
+		/* Turn all small CopyBs into loads/stores, and turn all bigger
+		 * CopyBs into memcpy calls, because we cannot handle CopyB nodes
+		 * during code generation yet.
+		 * TODO:  Adapt this once custom CopyB handling is implemented. */
+		lower_CopyB(irg, 64, 65, 4);
+	}
 }
 
 static int amd64_is_mux_allowed(ir_node *sel, ir_node *mux_false,
diff --git a/ir/be/arm/bearch_arm.c b/ir/be/arm/bearch_arm.c
index 6c32ca000..0f2f06823 100644
--- a/ir/be/arm/bearch_arm.c
+++ b/ir/be/arm/bearch_arm.c
@@ -542,6 +542,14 @@ static void arm_lower_for_target(void)
 		ir_graph *irg = get_irp_irg(i);
 		lower_switch(irg, 4, 256, true);
 	}
+
+	for (i = 0; i < n_irgs; ++i) {
+		ir_graph *irg = get_irp_irg(i);
+		/* Turn all small CopyBs into loads/stores and all bigger CopyBs into
+		 * memcpy calls.
+		 * TODO:  These constants need arm-specific tuning. */
+		lower_CopyB(irg, 31, 32, 4);
+	}
 }
 
 /**
diff --git a/ir/be/ia32/bearch_ia32.c b/ir/be/ia32/bearch_ia32.c
index 030b6af31..efcf897be 100644
--- a/ir/be/ia32/bearch_ia32.c
+++ b/ir/be/ia32/bearch_ia32.c
@@ -2049,6 +2049,14 @@ static void ia32_lower_for_target(void)
 		/* break up switches with wide ranges */
 		lower_switch(irg, 4, 256, false);
 	}
+
+	for (i = 0; i < n_irgs; ++i) {
+		ir_graph *irg = get_irp_irg(i);
+		/* Turn all small CopyBs into loads/stores, keep medium-sized CopyBs,
+		 * so we can generate rep movs later, and turn all big CopyBs into
+		 * memcpy calls. */
+		lower_CopyB(irg, 64, 8193, 4);
+	}
 }
 
 /**
diff --git a/ir/be/sparc/bearch_sparc.c b/ir/be/sparc/bearch_sparc.c
index 2f1920cf8..6feb3f78c 100644
--- a/ir/be/sparc/bearch_sparc.c
+++ b/ir/be/sparc/bearch_sparc.c
@@ -420,6 +420,7 @@ static void sparc_lower_for_target(void)
 		sparc_create_set,
 		0,
 	};
+
 	lower_calls_with_compounds(LF_RETURN_HIDDEN);
 
 	if (sparc_isa_template.fpu_arch == SPARC_FPU_ARCH_SOFTFLOAT)
@@ -434,6 +435,13 @@ static void sparc_lower_for_target(void)
 		ir_lower_mode_b(irg, &lower_mode_b_config);
 		lower_switch(irg, 4, 256, false);
 	}
+
+	for (i = 0; i < n_irgs; ++i) {
+		ir_graph *irg = get_irp_irg(i);
+		/* Turn all small CopyBs into loads/stores and all bigger CopyBs into
+		 * memcpy calls. */
+		lower_CopyB(irg, 31, 32, 4);
+	}
 }
 
 static int sparc_is_mux_allowed(ir_node *sel, ir_node *mux_false,
diff --git a/ir/lower/lower_copyb.c b/ir/lower/lower_copyb.c
index d18afa73a..f35702071 100644
--- a/ir/lower/lower_copyb.c
+++ b/ir/lower/lower_copyb.c
@@ -19,8 +19,8 @@
 
 /**
  * @file
- * @brief   Lower small CopyB nodes into a series of Load/store
- * @author  Michael Beck, Matthias Braun
+ * @brief   Lower small CopyB nodes into a series of Load/Store nodes
+ * @author  Michael Beck, Matthias Braun, Manuel Mohr
  * @version $Id$
  */
 #include "config.h"
@@ -42,10 +42,53 @@ struct entry {
 	ir_node *copyb;
 };
 
+/**
+ * Every CopyB is assigned a size category as follows:
+ *  - 'small'  iff                  size <= max_small_size,
+ *  - 'medium' iff max_small_size < size <  min_large_size,
+ *  - 'large'  iff                  size >= min_large_size.
+ *
+ * The idea is that each backend can apply different optimizations in each
+ * of the three categories.
+ *
+ * For small CopyBs, the x86 backend could, e.g., emit a single SSE
+ * instruction to copy 16 bytes.  Other backends might just go with a series
+ * of Load/Stores.  Therefore, x86 would like to keep the small CopyB nodes
+ * around whereas other backends would not.
+ * For medium-sized CopyBs, the x86 backend might generate a rep-prefixed mov
+ * instruction.  Hence, it also wants to keep the CopyBs in these cases.  Other
+ * backends might handle this differently.
+ * For large CopyBs, a call to memcpy is worth the call overhead, so large
+ * CopyBs should always be lowered to memcpy calls.
+ *
+ * The lowerer performs the following actions if the CopyB is
+ * - 'small':  Replace it with a series of Loads/Stores
+ * - 'medium': Nothing.
+ * - 'large':  Replace it with a call to memcpy.
+ *
+ * max_small_size and min_large_size allow for a flexible configuration.
+ * For example, one backend could specify max_small_size == 0 and
+ * min_large_size == 8192 to keep all CopyB nodes smaller than 8192 and get
+ * memcpy Calls for all others.  Here, the set of small CopyBs is empty.
+ * Another backend could specify max_small_size == 63 and min_large_size == 64
+ * to lower all small CopyBs to Loads/Stores and all big CopyBs to memcpy.
+ * Hence, the set of medium-sized CopyBs is empty and this backend never
+ * sees a CopyB node at all.
+ * If memcpy is not available, min_large_size can be set to UINT_MAX to prevent
+ * the creation of calls to memcpy.  Note that CopyBs whose size is UINT_MAX
+ * will still be lowered to memcpy calls because we check if the size is greater
+ * *or equal* to min_large_size.  However, this should never occur in practice.
+ */
+
+static unsigned max_small_size; /**< The maximum size of a CopyB node
+                                     so that it is regarded as 'small'. */
+static unsigned min_large_size; /**< The minimum size of a CopyB node
+                                     so that it is regarded as 'large'. */
+
 typedef struct walk_env {
-	unsigned         max_size;
-	struct obstack   obst;              /**< the obstack where data is allocated on */
-	struct list_head list;              /**< the list of copyb nodes */
+	struct obstack   obst;           /**< the obstack where data is allocated
+	                                      on. */
+	struct list_head list;           /**< the list of copyb nodes. */
 } walk_env_t;
 
 static ir_mode *get_ir_mode(unsigned bytes)
@@ -62,20 +105,20 @@ static ir_mode *get_ir_mode(unsigned bytes)
 }
 
 /**
- * lower a CopyB node.
+ * Turn a small CopyB node into a series of Load/Store nodes.
  */
-static void lower_copyb_nodes(ir_node *irn, unsigned mode_bytes)
+static void lower_small_copyb_node(ir_node *irn, unsigned mode_bytes)
 {
-	ir_graph        *irg = get_irn_irg(irn);
-	unsigned         size;
-	unsigned         offset;
-	ir_mode         *mode;
-	ir_mode         *addr_mode;
-	ir_node         *mem;
-	ir_node         *addr_src;
-	ir_node         *addr_dst;
-	ir_node         *block;
-	ir_type         *tp;
+	ir_graph *irg = get_irn_irg(irn);
+	unsigned  size;
+	unsigned  offset;
+	ir_mode  *mode;
+	ir_mode  *addr_mode;
+	ir_node  *mem;
+	ir_node  *addr_src;
+	ir_node  *addr_dst;
+	ir_node  *block;
+	ir_type  *tp;
 
 	addr_src  = get_CopyB_src(irn);
 	addr_dst  = get_CopyB_dst(irn);
@@ -124,8 +167,76 @@ static void lower_copyb_nodes(ir_node *irn, unsigned mode_bytes)
 	set_Tuple_pred(irn, pn_CopyB_X_except,  new_r_Bad(irg, mode_X));
 }
 
+static ir_type *get_memcpy_methodtype()
+{
+	ir_type *tp = new_type_method(3, 1);
+
+	set_method_param_type(tp, 0, get_type_for_mode(mode_P));
+	set_method_param_type(tp, 1, get_type_for_mode(mode_P));
+	set_method_param_type(tp, 2, get_type_for_mode(mode_Lu));
+	set_method_res_type  (tp, 0, get_type_for_mode(mode_P));
+
+	return tp;
+}
+
+static ir_node *get_memcpy_symconst(ir_graph *irg)
+{
+	ident     *id  = new_id_from_str("memcpy");
+	ir_type   *mt  = get_memcpy_methodtype();
+	ir_entity *ent = new_entity(get_glob_type(), id, mt);
+	symconst_symbol sym;
+
+	set_entity_ld_ident(ent, get_entity_ident(ent));
+	sym.entity_p = ent;
+
+	return new_r_SymConst(irg, mode_P_code, sym, symconst_addr_ent);
+}
+
+/**
+ * Turn a large CopyB node into a memcpy call.
+ */
+static void lower_large_copyb_node(ir_node *irn)
+{
+	ir_graph *irg      = get_irn_irg(irn);
+	ir_node  *block    = get_nodes_block(irn);
+	dbg_info *dbgi     = get_irn_dbg_info(irn);
+	ir_node  *mem      = get_CopyB_mem(irn);
+	ir_node  *addr_src = get_CopyB_src(irn);
+	ir_node  *addr_dst = get_CopyB_dst(irn);
+	ir_type  *copyb_tp = get_CopyB_type(irn);
+	unsigned  size     = get_type_size_bytes(copyb_tp);
+
+	ir_node  *symconst = get_memcpy_symconst(irg);
+	ir_type  *call_tp  = get_memcpy_methodtype();
+	ir_node  *in[3];
+	ir_node  *call;
+	ir_node  *call_mem;
+
+	in[0]    = addr_dst;
+	in[1]    = addr_src;
+	in[2]    = new_r_Const_long(irg, mode_Lu, size);
+	call     = new_rd_Call(dbgi, block, mem, symconst, 3, in, call_tp);
+	call_mem = new_r_Proj(call, mode_M, pn_Call_M);
+
+	turn_into_tuple(irn, 1);
+	set_irn_n(irn, pn_CopyB_M, call_mem);
+}
+
+static void lower_copyb_node(ir_node *irn, unsigned native_mode_bytes)
+{
+	ir_type *tp   = get_CopyB_type(irn);
+	unsigned size = get_type_size_bytes(tp);
+
+	if (size <= max_small_size)
+		lower_small_copyb_node(irn, native_mode_bytes);
+	else if (size >= min_large_size)
+		lower_large_copyb_node(irn);
+	else
+		assert(!"CopyB of invalid size handed to lower_copyb_node");
+}
+
 /**
- * Post-Walker: find small CopyB nodes.
+ * Post-Walker: find CopyB nodes.
  */
 static void find_copyb_nodes(ir_node *irn, void *ctx)
 {
@@ -133,6 +244,7 @@ static void find_copyb_nodes(ir_node *irn, void *ctx)
 	ir_type    *tp;
 	unsigned   size;
 	entry_t    *entry;
+	bool        medium_sized;
 
 	if (is_Proj(irn)) {
 		ir_node *pred = get_Proj_pred(irn);
@@ -152,11 +264,12 @@ static void find_copyb_nodes(ir_node *irn, void *ctx)
 	if (get_type_state(tp) != layout_fixed)
 		return;
 
-	size = get_type_size_bytes(tp);
-	if (size > env->max_size)
-		return;
+	size         = get_type_size_bytes(tp);
+	medium_sized = max_small_size < size && size < min_large_size;
+	if (medium_sized)
+		return; /* Nothing to do for medium-sized CopyBs. */
 
-	/* ok, link it in */
+	/* Okay, either small or large CopyB, so link it in and lower it later. */
 	entry = OALLOC(&env->obst, entry_t);
 	entry->copyb = irn;
 	INIT_LIST_HEAD(&entry->list);
@@ -164,18 +277,21 @@ static void find_copyb_nodes(ir_node *irn, void *ctx)
 	list_add_tail(&entry->list, &env->list);
 }
 
-void lower_CopyB(ir_graph *irg, unsigned max_size, unsigned native_mode_bytes)
+void lower_CopyB(ir_graph *irg, unsigned max_small_sz,
+                 unsigned min_large_sz, unsigned native_mode_bytes)
 {
 	walk_env_t env;
 	entry_t   *entry;
+	assert(max_small_sz < min_large_sz && "CopyB size ranges must not overlap");
 
 	obstack_init(&env.obst);
-	env.max_size = max_size;
+	max_small_size = max_small_sz;
+	min_large_size = min_large_sz;
 	INIT_LIST_HEAD(&env.list);
 	irg_walk_graph(irg, NULL, find_copyb_nodes, &env);
 
 	list_for_each_entry(entry_t, entry, &env.list, list) {
-		lower_copyb_nodes(entry->copyb, native_mode_bytes);
+		lower_copyb_node(entry->copyb, native_mode_bytes);
 	}
 
 	obstack_free(&env.obst, NULL);