2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Lower small CopyB nodes into a series of Load/Store nodes
23 * @author Michael Beck, Matthias Braun, Manuel Mohr
40 typedef struct entry entry_t;
42 struct list_head list;
47 * Every CopyB is assigned a size category as follows:
48 * - 'small' iff size <= max_small_size,
49 * - 'medium' iff max_small_size < size < min_large_size,
50 * - 'large' iff size >= min_large_size.
52 * The idea is that each backend can apply different optimizations in each
53 * of the three categories.
55 * For small CopyBs, the x86 backend could, e.g., emit a single SSE
56 * instruction to copy 16 bytes. Other backends might just go with a series
57 * of Load/Stores. Therefore, x86 would like to keep the small CopyB nodes
58 * around whereas other backends would not.
59 * For medium-sized CopyBs, the x86 backend might generate a rep-prefixed mov
60 * instruction. Hence, it also wants to keep the CopyBs in these cases. Other
61 * backends might handle this differently.
62 * For large CopyBs, a call to memcpy is worth the call overhead, so large
63 * CopyBs should always be lowered to memcpy calls.
65 * The lowerer performs the following actions if the CopyB is
66 * - 'small': Replace it with a series of Loads/Stores
67 * - 'medium': Nothing.
68 * - 'large': Replace it with a call to memcpy.
70 * max_small_size and min_large_size allow for a flexible configuration.
71 * For example, one backend could specify max_small_size == 0 and
72 * min_large_size == 8192 to keep all CopyB nodes smaller than 8192 and get
73 * memcpy Calls for all others. Here, the set of small CopyBs is empty.
74 * Another backend could specify max_small_size == 63 and min_large_size == 64
75 * to lower all small CopyBs to Loads/Stores and all big CopyBs to memcpy.
76 * Hence, the set of medium-sized CopyBs is empty and this backend never
77 * sees a CopyB node at all.
78 * If memcpy is not available, min_large_size can be set to UINT_MAX to prevent
79 * the creation of calls to memcpy. Note that CopyBs whose size is UINT_MAX
80 * will still be lowered to memcpy calls because we check if the size is greater
81 * *or equal* to min_large_size. However, this should never occur in practice.
84 static unsigned max_small_size; /**< The maximum size of a CopyB node
85 so that it is regarded as 'small'. */
86 static unsigned min_large_size; /**< The minimum size of a CopyB node
87 so that it is regarded as 'large'. */
88 static unsigned native_mode_bytes; /**< The size of the native mode in bytes. */
89 static int allow_misalignments; /**< Whether backend can handle misaligned
92 typedef struct walk_env {
93 struct obstack obst; /**< the obstack where data is allocated
95 struct list_head list; /**< the list of copyb nodes. */
98 static ir_mode *get_ir_mode(unsigned mode_bytes)
100 switch (mode_bytes) {
101 case 1: return mode_Bu;
102 case 2: return mode_Hu;
103 case 4: return mode_Iu;
104 case 8: return mode_Lu;
105 case 16: return mode_LLu;
107 panic("unexpected mode size requested in copyb lowering");
112 * Turn a small CopyB node into a series of Load/Store nodes.
114 static void lower_small_copyb_node(ir_node *irn)
116 ir_graph *irg = get_irn_irg(irn);
117 ir_node *block = get_nodes_block(irn);
118 ir_type *tp = get_CopyB_type(irn);
119 ir_node *addr_src = get_CopyB_src(irn);
120 ir_node *addr_dst = get_CopyB_dst(irn);
121 ir_node *mem = get_CopyB_mem(irn);
122 ir_mode *addr_mode = get_irn_mode(addr_src);
123 unsigned mode_bytes = allow_misalignments ? native_mode_bytes : tp->align;
124 unsigned size = get_type_size_bytes(tp);
128 while (offset < size) {
129 mode = get_ir_mode(mode_bytes);
130 for (; offset + mode_bytes <= size; offset += mode_bytes) {
131 /* construct offset */
140 addr_const = new_r_Const_long(irg, mode_Iu, offset);
141 add = new_r_Add(block, addr_src, addr_const, addr_mode);
143 load = new_r_Load(block, mem, add, mode, cons_none);
144 load_res = new_r_Proj(load, mode, pn_Load_res);
145 load_mem = new_r_Proj(load, mode_M, pn_Load_M);
147 addr_const = new_r_Const_long(irg, mode_Iu, offset);
148 add = new_r_Add(block, addr_dst, addr_const, addr_mode);
150 store = new_r_Store(block, load_mem, add, load_res, cons_none);
151 store_mem = new_r_Proj(store, mode_M, pn_Store_M);
159 turn_into_tuple(irn, pn_CopyB_max+1);
160 set_Tuple_pred(irn, pn_CopyB_M, mem);
161 set_Tuple_pred(irn, pn_CopyB_X_regular, new_r_Bad(irg, mode_X));
162 set_Tuple_pred(irn, pn_CopyB_X_except, new_r_Bad(irg, mode_X));
165 static ir_type *get_memcpy_methodtype(void)
167 ir_type *tp = new_type_method(3, 1);
168 ir_mode *size_t_mode = get_ir_mode(native_mode_bytes);
170 set_method_param_type(tp, 0, get_type_for_mode(mode_P));
171 set_method_param_type(tp, 1, get_type_for_mode(mode_P));
172 set_method_param_type(tp, 2, get_type_for_mode(size_t_mode));
173 set_method_res_type (tp, 0, get_type_for_mode(mode_P));
178 static ir_node *get_memcpy_symconst(ir_graph *irg)
180 ident *id = new_id_from_str("memcpy");
181 ir_type *mt = get_memcpy_methodtype();
182 ir_entity *ent = create_compilerlib_entity(id, mt);
186 return new_r_SymConst(irg, mode_P_code, sym, symconst_addr_ent);
190 * Turn a large CopyB node into a memcpy call.
192 static void lower_large_copyb_node(ir_node *irn)
194 ir_graph *irg = get_irn_irg(irn);
195 ir_node *block = get_nodes_block(irn);
196 dbg_info *dbgi = get_irn_dbg_info(irn);
197 ir_node *mem = get_CopyB_mem(irn);
198 ir_node *addr_src = get_CopyB_src(irn);
199 ir_node *addr_dst = get_CopyB_dst(irn);
200 ir_type *copyb_tp = get_CopyB_type(irn);
201 unsigned size = get_type_size_bytes(copyb_tp);
203 ir_node *symconst = get_memcpy_symconst(irg);
204 ir_type *call_tp = get_memcpy_methodtype();
205 ir_mode *mode_size_t = get_ir_mode(native_mode_bytes);
212 in[2] = new_r_Const_long(irg, mode_size_t, size);
213 call = new_rd_Call(dbgi, block, mem, symconst, 3, in, call_tp);
214 call_mem = new_r_Proj(call, mode_M, pn_Call_M);
216 turn_into_tuple(irn, 1);
217 set_irn_n(irn, pn_CopyB_M, call_mem);
220 static void lower_copyb_node(ir_node *irn)
222 ir_type *tp = get_CopyB_type(irn);
223 unsigned size = get_type_size_bytes(tp);
225 if (size <= max_small_size)
226 lower_small_copyb_node(irn);
227 else if (size >= min_large_size)
228 lower_large_copyb_node(irn);
230 assert(!"CopyB of invalid size handed to lower_copyb_node");
234 * Post-Walker: find CopyB nodes.
236 static void find_copyb_nodes(ir_node *irn, void *ctx)
238 walk_env_t *env = (walk_env_t*)ctx;
245 ir_node *pred = get_Proj_pred(irn);
247 if (is_CopyB(pred) && get_Proj_proj(irn) != pn_CopyB_M) {
248 /* found an exception Proj: remove it from the list again */
249 entry = (entry_t*)get_irn_link(pred);
250 list_del(&entry->list);
258 tp = get_CopyB_type(irn);
259 if (get_type_state(tp) != layout_fixed)
262 size = get_type_size_bytes(tp);
263 medium_sized = max_small_size < size && size < min_large_size;
265 return; /* Nothing to do for medium-sized CopyBs. */
267 /* Okay, either small or large CopyB, so link it in and lower it later. */
268 entry = OALLOC(&env->obst, entry_t);
270 INIT_LIST_HEAD(&entry->list);
271 set_irn_link(irn, entry);
272 list_add_tail(&entry->list, &env->list);
275 void lower_CopyB(ir_graph *irg, unsigned max_small_sz, unsigned min_large_sz,
278 const backend_params *bparams = be_get_backend_param();
282 assert(max_small_sz < min_large_sz && "CopyB size ranges must not overlap");
284 max_small_size = max_small_sz;
285 min_large_size = min_large_sz;
286 native_mode_bytes = bparams->machine_size / 8;
287 allow_misalignments = allow_misaligns;
289 obstack_init(&env.obst);
290 INIT_LIST_HEAD(&env.list);
291 irg_walk_graph(irg, NULL, find_copyb_nodes, &env);
293 list_for_each_entry(entry_t, entry, &env.list, list) {
294 lower_copyb_node(entry->copyb);
297 obstack_free(&env.obst, NULL);