From: Manuel Mohr Date: Wed, 31 Aug 2011 17:34:55 +0000 (+0200) Subject: Improved CopyB lowering, made it part of target lowering. X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=bb3144f01520732c3e22858e820ed9f7ca8c912f;p=libfirm Improved CopyB lowering, made it part of target lowering. Backends can configure CopyB lowering, so that it's possible to keep CopyB nodes in a certain size range for special backend-specific optimizations. Furthermore, large CopyBs are turned into memcpy calls. --- diff --git a/include/libfirm/lowering.h b/include/libfirm/lowering.h index 70e38938c..56b046ad9 100644 --- a/include/libfirm/lowering.h +++ b/include/libfirm/lowering.h @@ -33,10 +33,25 @@ #include "begin.h" /** - * Lower CopyB nodes of size smaller that max_size into Loads/Stores - */ -FIRM_API void lower_CopyB(ir_graph *irg, unsigned max_size, - unsigned native_mode_bytes); + * Lower small CopyB nodes to Load/Store nodes, preserve medium-sized CopyB + * nodes and replace large CopyBs by a call to memcpy, depending on the given + * parameters. + * + * Small CopyB nodes (size <= max_small_size) are turned into a series of + * loads and stores. + * Medium-sized CopyB nodes (max_small_size < size < min_large_size) are + * left untouched. + * Large CopyB nodes (size >= min_large_size) are turned into a memcpy call. + * + * @param irg The graph to be lowered. + * @param max_small_size The maximum number of bytes for a CopyB node so + * that it is still considered 'small'. + * @param min_large_size The minimum number of bytes for a CopyB node so + * that it is regarded as 'large'. + * @param native_mode_bytes Specify load/store size, typically register width. + */ +FIRM_API void lower_CopyB(ir_graph *irg, unsigned max_small_size, + unsigned min_large_size, unsigned native_mode_bytes); /** * Lowers all Switches (Cond nodes with non-boolean mode) depending on spare_size. diff --git a/ir/be/amd64/bearch_amd64.c b/ir/be/amd64/bearch_amd64.c index 65257c4c5..5f50aeb5d 100644 --- a/ir/be/amd64/bearch_amd64.c +++ b/ir/be/amd64/bearch_amd64.c @@ -470,8 +470,19 @@ static int amd64_get_reg_class_alignment(const arch_register_class_t *cls) static void amd64_lower_for_target(void) { + size_t i, n_irgs = get_irp_n_irgs(); + /* lower compound param handling */ lower_calls_with_compounds(LF_RETURN_HIDDEN); + + for (i = 0; i < n_irgs; ++i) { + ir_graph *irg = get_irp_irg(i); + /* Turn all small CopyBs into loads/stores, and turn all bigger + * CopyBs into memcpy calls, because we cannot handle CopyB nodes + * during code generation yet. + * TODO: Adapt this once custom CopyB handling is implemented. */ + lower_CopyB(irg, 64, 65, 4); + } } static int amd64_is_mux_allowed(ir_node *sel, ir_node *mux_false, diff --git a/ir/be/arm/bearch_arm.c b/ir/be/arm/bearch_arm.c index 6c32ca000..0f2f06823 100644 --- a/ir/be/arm/bearch_arm.c +++ b/ir/be/arm/bearch_arm.c @@ -542,6 +542,14 @@ static void arm_lower_for_target(void) ir_graph *irg = get_irp_irg(i); lower_switch(irg, 4, 256, true); } + + for (i = 0; i < n_irgs; ++i) { + ir_graph *irg = get_irp_irg(i); + /* Turn all small CopyBs into loads/stores and all bigger CopyBs into + * memcpy calls. + * TODO: These constants need arm-specific tuning. */ + lower_CopyB(irg, 31, 32, 4); + } } /** diff --git a/ir/be/ia32/bearch_ia32.c b/ir/be/ia32/bearch_ia32.c index 030b6af31..efcf897be 100644 --- a/ir/be/ia32/bearch_ia32.c +++ b/ir/be/ia32/bearch_ia32.c @@ -2049,6 +2049,14 @@ static void ia32_lower_for_target(void) /* break up switches with wide ranges */ lower_switch(irg, 4, 256, false); } + + for (i = 0; i < n_irgs; ++i) { + ir_graph *irg = get_irp_irg(i); + /* Turn all small CopyBs into loads/stores, keep medium-sized CopyBs, + * so we can generate rep movs later, and turn all big CopyBs into + * memcpy calls. */ + lower_CopyB(irg, 64, 8193, 4); + } } /** diff --git a/ir/be/sparc/bearch_sparc.c b/ir/be/sparc/bearch_sparc.c index 2f1920cf8..6feb3f78c 100644 --- a/ir/be/sparc/bearch_sparc.c +++ b/ir/be/sparc/bearch_sparc.c @@ -420,6 +420,7 @@ static void sparc_lower_for_target(void) sparc_create_set, 0, }; + lower_calls_with_compounds(LF_RETURN_HIDDEN); if (sparc_isa_template.fpu_arch == SPARC_FPU_ARCH_SOFTFLOAT) @@ -434,6 +435,13 @@ static void sparc_lower_for_target(void) ir_lower_mode_b(irg, &lower_mode_b_config); lower_switch(irg, 4, 256, false); } + + for (i = 0; i < n_irgs; ++i) { + ir_graph *irg = get_irp_irg(i); + /* Turn all small CopyBs into loads/stores and all bigger CopyBs into + * memcpy calls. */ + lower_CopyB(irg, 31, 32, 4); + } } static int sparc_is_mux_allowed(ir_node *sel, ir_node *mux_false, diff --git a/ir/lower/lower_copyb.c b/ir/lower/lower_copyb.c index d18afa73a..f35702071 100644 --- a/ir/lower/lower_copyb.c +++ b/ir/lower/lower_copyb.c @@ -19,8 +19,8 @@ /** * @file - * @brief Lower small CopyB nodes into a series of Load/store - * @author Michael Beck, Matthias Braun + * @brief Lower small CopyB nodes into a series of Load/Store nodes + * @author Michael Beck, Matthias Braun, Manuel Mohr * @version $Id$ */ #include "config.h" @@ -42,10 +42,53 @@ struct entry { ir_node *copyb; }; +/** + * Every CopyB is assigned a size category as follows: + * - 'small' iff size <= max_small_size, + * - 'medium' iff max_small_size < size < min_large_size, + * - 'large' iff size >= min_large_size. + * + * The idea is that each backend can apply different optimizations in each + * of the three categories. + * + * For small CopyBs, the x86 backend could, e.g., emit a single SSE + * instruction to copy 16 bytes. Other backends might just go with a series + * of Load/Stores. Therefore, x86 would like to keep the small CopyB nodes + * around whereas other backends would not. + * For medium-sized CopyBs, the x86 backend might generate a rep-prefixed mov + * instruction. Hence, it also wants to keep the CopyBs in these cases. Other + * backends might handle this differently. + * For large CopyBs, a call to memcpy is worth the call overhead, so large + * CopyBs should always be lowered to memcpy calls. + * + * The lowerer performs the following actions if the CopyB is + * - 'small': Replace it with a series of Loads/Stores + * - 'medium': Nothing. + * - 'large': Replace it with a call to memcpy. + * + * max_small_size and min_large_size allow for a flexible configuration. + * For example, one backend could specify max_small_size == 0 and + * min_large_size == 8192 to keep all CopyB nodes smaller than 8192 and get + * memcpy Calls for all others. Here, the set of small CopyBs is empty. + * Another backend could specify max_small_size == 63 and min_large_size == 64 + * to lower all small CopyBs to Loads/Stores and all big CopyBs to memcpy. + * Hence, the set of medium-sized CopyBs is empty and this backend never + * sees a CopyB node at all. + * If memcpy is not available, min_large_size can be set to UINT_MAX to prevent + * the creation of calls to memcpy. Note that CopyBs whose size is UINT_MAX + * will still be lowered to memcpy calls because we check if the size is greater + * *or equal* to min_large_size. However, this should never occur in practice. + */ + +static unsigned max_small_size; /**< The maximum size of a CopyB node + so that it is regarded as 'small'. */ +static unsigned min_large_size; /**< The minimum size of a CopyB node + so that it is regarded as 'large'. */ + typedef struct walk_env { - unsigned max_size; - struct obstack obst; /**< the obstack where data is allocated on */ - struct list_head list; /**< the list of copyb nodes */ + struct obstack obst; /**< the obstack where data is allocated + on. */ + struct list_head list; /**< the list of copyb nodes. */ } walk_env_t; static ir_mode *get_ir_mode(unsigned bytes) @@ -62,20 +105,20 @@ static ir_mode *get_ir_mode(unsigned bytes) } /** - * lower a CopyB node. + * Turn a small CopyB node into a series of Load/Store nodes. */ -static void lower_copyb_nodes(ir_node *irn, unsigned mode_bytes) +static void lower_small_copyb_node(ir_node *irn, unsigned mode_bytes) { - ir_graph *irg = get_irn_irg(irn); - unsigned size; - unsigned offset; - ir_mode *mode; - ir_mode *addr_mode; - ir_node *mem; - ir_node *addr_src; - ir_node *addr_dst; - ir_node *block; - ir_type *tp; + ir_graph *irg = get_irn_irg(irn); + unsigned size; + unsigned offset; + ir_mode *mode; + ir_mode *addr_mode; + ir_node *mem; + ir_node *addr_src; + ir_node *addr_dst; + ir_node *block; + ir_type *tp; addr_src = get_CopyB_src(irn); addr_dst = get_CopyB_dst(irn); @@ -124,8 +167,76 @@ static void lower_copyb_nodes(ir_node *irn, unsigned mode_bytes) set_Tuple_pred(irn, pn_CopyB_X_except, new_r_Bad(irg, mode_X)); } +static ir_type *get_memcpy_methodtype() +{ + ir_type *tp = new_type_method(3, 1); + + set_method_param_type(tp, 0, get_type_for_mode(mode_P)); + set_method_param_type(tp, 1, get_type_for_mode(mode_P)); + set_method_param_type(tp, 2, get_type_for_mode(mode_Lu)); + set_method_res_type (tp, 0, get_type_for_mode(mode_P)); + + return tp; +} + +static ir_node *get_memcpy_symconst(ir_graph *irg) +{ + ident *id = new_id_from_str("memcpy"); + ir_type *mt = get_memcpy_methodtype(); + ir_entity *ent = new_entity(get_glob_type(), id, mt); + symconst_symbol sym; + + set_entity_ld_ident(ent, get_entity_ident(ent)); + sym.entity_p = ent; + + return new_r_SymConst(irg, mode_P_code, sym, symconst_addr_ent); +} + +/** + * Turn a large CopyB node into a memcpy call. + */ +static void lower_large_copyb_node(ir_node *irn) +{ + ir_graph *irg = get_irn_irg(irn); + ir_node *block = get_nodes_block(irn); + dbg_info *dbgi = get_irn_dbg_info(irn); + ir_node *mem = get_CopyB_mem(irn); + ir_node *addr_src = get_CopyB_src(irn); + ir_node *addr_dst = get_CopyB_dst(irn); + ir_type *copyb_tp = get_CopyB_type(irn); + unsigned size = get_type_size_bytes(copyb_tp); + + ir_node *symconst = get_memcpy_symconst(irg); + ir_type *call_tp = get_memcpy_methodtype(); + ir_node *in[3]; + ir_node *call; + ir_node *call_mem; + + in[0] = addr_dst; + in[1] = addr_src; + in[2] = new_r_Const_long(irg, mode_Lu, size); + call = new_rd_Call(dbgi, block, mem, symconst, 3, in, call_tp); + call_mem = new_r_Proj(call, mode_M, pn_Call_M); + + turn_into_tuple(irn, 1); + set_irn_n(irn, pn_CopyB_M, call_mem); +} + +static void lower_copyb_node(ir_node *irn, unsigned native_mode_bytes) +{ + ir_type *tp = get_CopyB_type(irn); + unsigned size = get_type_size_bytes(tp); + + if (size <= max_small_size) + lower_small_copyb_node(irn, native_mode_bytes); + else if (size >= min_large_size) + lower_large_copyb_node(irn); + else + assert(!"CopyB of invalid size handed to lower_copyb_node"); +} + /** - * Post-Walker: find small CopyB nodes. + * Post-Walker: find CopyB nodes. */ static void find_copyb_nodes(ir_node *irn, void *ctx) { @@ -133,6 +244,7 @@ static void find_copyb_nodes(ir_node *irn, void *ctx) ir_type *tp; unsigned size; entry_t *entry; + bool medium_sized; if (is_Proj(irn)) { ir_node *pred = get_Proj_pred(irn); @@ -152,11 +264,12 @@ static void find_copyb_nodes(ir_node *irn, void *ctx) if (get_type_state(tp) != layout_fixed) return; - size = get_type_size_bytes(tp); - if (size > env->max_size) - return; + size = get_type_size_bytes(tp); + medium_sized = max_small_size < size && size < min_large_size; + if (medium_sized) + return; /* Nothing to do for medium-sized CopyBs. */ - /* ok, link it in */ + /* Okay, either small or large CopyB, so link it in and lower it later. */ entry = OALLOC(&env->obst, entry_t); entry->copyb = irn; INIT_LIST_HEAD(&entry->list); @@ -164,18 +277,21 @@ static void find_copyb_nodes(ir_node *irn, void *ctx) list_add_tail(&entry->list, &env->list); } -void lower_CopyB(ir_graph *irg, unsigned max_size, unsigned native_mode_bytes) +void lower_CopyB(ir_graph *irg, unsigned max_small_sz, + unsigned min_large_sz, unsigned native_mode_bytes) { walk_env_t env; entry_t *entry; + assert(max_small_sz < min_large_sz && "CopyB size ranges must not overlap"); obstack_init(&env.obst); - env.max_size = max_size; + max_small_size = max_small_sz; + min_large_size = min_large_sz; INIT_LIST_HEAD(&env.list); irg_walk_graph(irg, NULL, find_copyb_nodes, &env); list_for_each_entry(entry_t, entry, &env.list, list) { - lower_copyb_nodes(entry->copyb, native_mode_bytes); + lower_copyb_node(entry->copyb, native_mode_bytes); } obstack_free(&env.obst, NULL);