2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief Procedure cloning.
9 * @author Beyhan Veliev, Michael Beck
12 * The purpose is first to find and analyze functions, that are called
13 * with constant parameter(s).
14 * The second step is to optimize the function that are found from our
15 * analyze. Optimize mean to make a new function with parameters, that
16 * aren't be constant. The constant parameters of the function are placed
17 * in the function graph. They aren't be passed as parameters.
23 #include "iroptimize.h"
29 #include "analyze_irg_args.h"
40 * This struct contains the information quadruple for a Call, which we need to
41 * decide if this function must be cloned.
43 typedef struct quadruple {
44 ir_entity *ent; /**< The entity of our Call. */
45 size_t pos; /**< Position of a constant argument of our Call. */
46 ir_tarval *tv; /**< The tarval of this argument if Const node. */
47 ir_node **calls; /**< The list of all calls with the same characteristics */
51 * The quadruplets are hold in a sorted list
53 typedef struct entry {
54 quadruple_t q; /**< the quadruple */
55 float weight; /**< its weight */
56 struct entry *next; /**< link to the next one */
59 typedef struct q_set {
60 struct obstack obst; /**< an obstack containing all entries */
61 pset *map; /**< a hash map containing the quadruples */
62 entry_t *heavy_uses; /**< the ordered list of heavy uses */
66 * Compare two quadruplets.
68 * @return zero if they are identically, non-zero else
70 static int entry_cmp(const void *elt, const void *key)
72 const entry_t *e1 = (const entry_t*)elt;
73 const entry_t *e2 = (const entry_t*)key;
75 return (e1->q.ent != e2->q.ent) || (e1->q.pos != e2->q.pos) || (e1->q.tv != e2->q.tv);
79 * Hash an element of type entry_t.
81 * @param entry The element to be hashed.
83 static unsigned hash_entry(const entry_t *entry)
85 return hash_ptr(entry->q.ent) ^ hash_ptr(entry->q.tv) ^ (unsigned)(entry->q.pos * 9);
89 * Free memory associated with a quadruplet.
91 static void kill_entry(entry_t *entry)
94 DEL_ARR_F(entry->q.calls);
95 entry->q.calls = NULL;
100 * Process a call node.
102 * @param call A ir_node to be checked.
103 * @param callee The entity of the callee
104 * @param hmap The quadruple-set containing the calls with constant parameters
106 static void process_call(ir_node *call, ir_entity *callee, q_set *hmap)
108 entry_t *key, *entry;
112 n_params = get_Call_n_params(call);
115 * Beware: we cannot clone variadic parameters as well as the
116 * last non-variadic one, which might be needed for the va_start()
120 /* In this for loop we collect the calls, that have
121 an constant parameter. */
122 for (i = n_params; i > 0;) {
123 call_param = get_Call_param(call, --i);
124 if (is_Const(call_param)) {
125 /* we have found a Call to collect and we save the informations,
128 hmap->map = new_pset(entry_cmp, 8);
130 key = OALLOC(&hmap->obst, entry_t);
134 key->q.tv = get_Const_tarval(call_param);
139 /* We insert our information in the set, where we collect the calls.*/
140 entry = (entry_t*)pset_insert(hmap->map, key, hash_entry(key));
143 obstack_free(&hmap->obst, key);
145 /* add the call to the list */
146 if (! entry->q.calls) {
147 entry->q.calls = NEW_ARR_F(ir_node *, 1);
148 entry->q.calls[0] = call;
150 ARR_APP1(ir_node *, entry->q.calls, call);
156 * Collect all calls in a ir_graph to a set.
158 * @param call A ir_node to be checked.
159 * @param env The quadruple-set containing the calls with constant parameters
161 static void collect_irg_calls(ir_node *call, void *env)
163 q_set *hmap = (q_set*)env;
167 /* We collect just "Call" nodes */
169 call_ptr = get_Call_ptr(call);
171 if (! is_SymConst_addr_ent(call_ptr))
174 callee = get_SymConst_entity(call_ptr);
176 /* we don't know which function gets finally bound to a weak symbol */
177 if (get_entity_linkage(callee) & IR_LINKAGE_WEAK)
180 /* we can only clone calls to existing entities */
181 if (get_entity_irg(callee) == NULL)
184 process_call(call, callee, hmap);
189 * Make a name for a clone. The clone name is
190 * the name of the original method suffixed with "_cl_pos_nr".
191 * pos is the pos from our quadruplet and nr is a counter.
193 * @param id The ident of the cloned function.
194 * @param pos The "pos" from our quadruplet.
195 * @param nr A counter for the clones.
197 static ident *get_clone_ident(ident *id, size_t pos, size_t nr)
199 char clone_postfix[32];
201 ir_snprintf(clone_postfix, sizeof(clone_postfix), "_cl_%zu_%zu", pos, nr);
203 return id_mangle(id, new_id_from_str(clone_postfix));
207 * Pre-Walker: Copies blocks and nodes from the original method graph
208 * to the cloned graph. Fixes the argument projection numbers for
209 * all arguments behind the removed one.
211 * @param irn A node from the original method graph.
212 * @param env The clone graph.
214 static void copy_nodes(ir_node *irn, void *env)
216 ir_graph *clone_irg = (ir_graph*)env;
217 ir_node *arg = (ir_node*)get_irg_link(clone_irg);
218 ir_node *irg_args = get_Proj_pred(arg);
222 /* Copy all nodes except the arg. */
224 copy_irn_to_irg(irn, clone_irg);
226 irn_copy = (ir_node*)get_irn_link(irn);
228 /* Fix argument numbers */
229 if (is_Proj(irn) && get_Proj_pred(irn) == irg_args) {
230 proj_nr = get_Proj_proj(irn);
231 if (get_Proj_proj(arg) < proj_nr)
232 set_Proj_proj(irn_copy, proj_nr - 1);
237 * Post-walker: Set the predecessors of the copied nodes.
238 * The copied nodes are set as link of their original nodes. The links of
239 * "irn" predecessors are the predecessors of copied node.
241 static void set_preds(ir_node *irn, void *env)
243 ir_graph *clone_irg = (ir_graph*)env;
244 ir_node *arg = (ir_node*)get_irg_link(clone_irg);
249 /* Arg is the method argument, that we have replaced by a constant.*/
253 irn_copy = (ir_node*)get_irn_link(irn);
256 ir_graph *const irg = get_Block_irg(irn);
257 for (i = get_Block_n_cfgpreds(irn) - 1; i >= 0; --i) {
258 pred = get_Block_cfgpred(irn, i);
259 /* "End" block must be handled extra, because it is not matured.*/
260 if (get_irg_end_block(irg) == irn)
261 add_immBlock_pred(get_irg_end_block(clone_irg), (ir_node*)get_irn_link(pred));
263 set_Block_cfgpred(irn_copy, i, (ir_node*)get_irn_link(pred));
266 /* First we set the block our copy if it is not a block.*/
267 set_nodes_block(irn_copy, (ir_node*)get_irn_link(get_nodes_block(irn)));
269 /* Handle the keep-alives. This must be done separately, because
270 the End node was NOT copied */
271 for (i = 0; i < get_End_n_keepalives(irn); ++i)
272 add_End_keepalive(irn_copy, (ir_node*)get_irn_link(get_End_keepalive(irn, i)));
274 for (i = get_irn_arity(irn) - 1; i >= 0; i--) {
275 pred = get_irn_n(irn, i);
276 set_irn_n(irn_copy, i, (ir_node*)get_irn_link(pred));
283 * Get the method argument at the position "pos".
285 * @param irg irg that must be cloned.
286 * @param pos The position of the argument.
288 static ir_node *get_irg_arg(ir_graph *irg, size_t pos)
290 ir_node *irg_args = get_irg_args(irg), *arg = NULL;
292 /* Call algorithm that computes the out edges */
293 assure_irg_outs(irg);
295 /* Search the argument with the number pos.*/
296 for (unsigned i = get_irn_n_outs(irg_args); i-- > 0; ) {
297 ir_node *proj = get_irn_out(irg_args, i);
298 if ((int)pos == get_Proj_proj(proj)) {
301 * More than one arg node found:
302 * We rely on the fact that only one arg exists, so do
303 * a cheap CSE in this case.
305 set_irn_out(irg_args, i, arg, 0);
311 assert(arg && "Argument not found");
316 * Create a new graph for the clone of the method,
317 * that we want to clone.
319 * @param ent The entity of the method that must be cloned.
320 * @param q Our quadruplet.
322 static void create_clone_proc_irg(ir_entity *ent, const quadruple_t *q)
324 ir_graph *method_irg, *clone_irg;
325 ir_node *arg, *const_arg;
327 method_irg = get_entity_irg(ent);
329 /* We create the skeleton of the clone irg.*/
330 clone_irg = new_ir_graph(ent, 0);
332 arg = get_irg_arg(get_entity_irg(q->ent), q->pos);
333 /* we will replace the argument in position "q->pos" by this constant. */
334 const_arg = new_r_Const(clone_irg, q->tv);
336 /* args copy in the cloned graph will be the const. */
337 set_irn_link(arg, const_arg);
339 /* Store the arg that will be replaced here, so we can easily detect it. */
340 set_irg_link(clone_irg, arg);
342 /* We copy the blocks and nodes, that must be in
343 the clone graph and set their predecessors. */
344 irg_walk_graph(method_irg, copy_nodes, set_preds, clone_irg);
346 /* The "cloned" graph must be matured. */
347 mature_immBlock(get_irg_end_block(clone_irg));
348 irg_finalize_cons(clone_irg);
352 * The function create a new entity type
353 * for our clone and set it to clone entity.
355 * @param q Contains information for the method to clone.
356 * @param ent The entity of the clone.
357 * @param nr A pointer to the counter of clones.
359 static void change_entity_type(const quadruple_t *q, ir_entity *ent)
361 ir_type *mtp, *new_mtp, *tp;
362 size_t i, j, n_params, n_ress;
364 mtp = get_entity_type(q->ent);
365 n_params = get_method_n_params(mtp);
366 n_ress = get_method_n_ress(mtp);
368 /* Create the new type for our clone. It must have one parameter
369 less then the original.*/
370 new_mtp = new_type_method(n_params - 1, n_ress);
372 /* We must set the type of the methods parameters.*/
373 for (i = j = 0; i < n_params; ++i) {
375 /* This is the position of the argument, that we have
379 tp = get_method_param_type(mtp, i);
380 set_method_param_type(new_mtp, j++, tp);
382 /* Copy the methods result types. */
383 for (i = 0; i < n_ress; ++i) {
384 tp = get_method_res_type(mtp, i);
385 set_method_res_type(new_mtp, i, tp);
387 set_entity_type(ent, new_mtp);
391 * Make a clone of a method.
393 * @param q Contains information for the method to clone.
395 static ir_entity *clone_method(const quadruple_t *q)
397 ir_entity *new_entity;
399 /* A counter for the clones.*/
400 static size_t nr = 0;
402 /* We get a new ident for our clone method.*/
403 clone_ident = get_clone_ident(get_entity_ident(q->ent), q->pos, nr);
404 /* We get our entity for the clone method. */
405 new_entity = copy_entity_name(q->ent, clone_ident);
407 /* a cloned entity is always local */
408 set_entity_visibility(new_entity, ir_visibility_local);
410 /* set a ld name here: Should we mangle this ? */
411 set_entity_ld_ident(new_entity, get_entity_ident(new_entity));
413 /* set a new type here. */
414 change_entity_type(q, new_entity);
416 /* We need now a new ir_graph for our clone method. */
417 create_clone_proc_irg(new_entity, q);
419 /* The "new_entity" don't have this information. */
420 new_entity->attr.mtd_attr.param_access = NULL;
421 new_entity->attr.mtd_attr.param_weight = NULL;
427 * Creates a new "cloned" Call node and return it.
429 * @param call The call that must be cloned.
430 * @param new_entity The entity of the cloned function.
431 * @param pos The position of the replaced parameter of this call.
433 static ir_node *new_cl_Call(ir_node *call, ir_entity *new_entity, size_t pos)
436 size_t i, n_params, new_params = 0;
439 ir_graph *irg = get_irn_irg(call);
440 ir_node *bl = get_nodes_block(call);
442 sym.entity_p = new_entity;
443 callee = new_r_SymConst(irg, mode_P_code, sym, symconst_addr_ent);
445 n_params = get_Call_n_params(call);
446 NEW_ARR_A(ir_node *, in, n_params - 1);
448 /* we save the parameters of the new call in the array "in" without the
449 * parameter in position "pos", that is replaced with a constant.*/
450 for (i = 0; i < n_params; ++i) {
452 in[new_params++] = get_Call_param(call, i);
454 /* Create and return the new Call. */
455 return new_r_Call(bl, get_Call_mem(call),
456 callee, n_params - 1, in, get_entity_type(new_entity));
460 * Exchange all Calls stored in the quadruplet to Calls of the cloned entity.
462 * @param q The quadruple
463 * @param cloned_ent The entity of the new function that must be called
466 static void exchange_calls(quadruple_t *q, ir_entity *cloned_ent)
469 ir_node *new_call, *call;
472 /* We iterate the list of the "call".*/
473 for (i = 0; i < ARR_LEN(q->calls); ++i) {
476 /* A clone exist and the copy of "call" in this
477 * clone graph must be exchanged with new one.*/
478 new_call = new_cl_Call(call, cloned_ent, pos);
479 exchange(call, new_call);
484 * The weight formula:
485 * We save one instruction in every caller and param_weight instructions
488 static float calculate_weight(const entry_t *entry)
490 return ARR_LEN(entry->q.calls) *
491 (float)(get_method_param_weight(entry->q.ent, entry->q.pos) + 1);
495 * After we exchanged all calls, some entries on the list for
496 * the next cloned entity may get invalid, so we have to check
497 * them and may even update the list of heavy uses.
499 static void reorder_weights(q_set *hmap, float threshold)
501 entry_t **adr, *p, *entry;
505 entry = hmap->heavy_uses;
509 len = ARR_LEN(entry->q.calls);
510 for (i = 0; i < len; ++i) {
511 ir_node *ptr, *call = entry->q.calls[i];
513 /* might be exchanged, so skip Id nodes here. */
514 call = skip_Id(call);
516 /* we know, that a SymConst is here */
517 ptr = get_Call_ptr(call);
519 ir_entity *const callee = get_SymConst_entity(ptr);
520 if (callee != entry->q.ent) {
522 * This call is already changed because of a previous
523 * optimization. Remove it from the list.
526 entry->q.calls[i] = entry->q.calls[len];
527 entry->q.calls[len] = NULL;
529 /* the new call should be processed */
530 process_call(call, callee, hmap);
535 /* the length might be changed */
536 ARR_SHRINKLEN(entry->q.calls, len);
538 /* recalculate the weight and resort the heavy uses map */
539 entry->weight = calculate_weight(entry);
541 if (len <= 0 || entry->weight < threshold) {
542 hmap->heavy_uses = entry->next;
545 /* we have changed the list, check the next one */
550 for (p = entry->next; p && entry->weight < p->weight; p = p->next) {
555 hmap->heavy_uses = entry->next;
559 /* we have changed the list, check the next one */
565 * Do the procedure cloning. Evaluate a heuristic weight for every
566 * call(..., Const, ...). If the weight is bigger than threshold,
567 * clone the entity and fix the calls.
569 void proc_cloning(float threshold)
575 DEBUG_ONLY(firm_dbg_module_t *dbg;)
577 /* register a debug mask */
578 FIRM_DBG_REGISTER(dbg, "firm.opt.proc_cloning");
580 obstack_init(&hmap.obst);
582 hmap.heavy_uses = NULL;
584 /* initially fill our map by visiting all irgs */
585 for (i = 0, n = get_irp_n_irgs(); i < n; ++i) {
586 ir_graph *irg = get_irp_irg(i);
587 irg_walk_graph(irg, collect_irg_calls, NULL, &hmap);
590 /* We have the "Call" nodes to optimize in set "set_entries". Our algorithm
591 replace one constant parameter and make a new "Call" node for all found "Calls". It exchange the
592 old one with the new one and the algorithm is called with the new "Call".
594 while (hmap.map || hmap.heavy_uses) {
595 /* We iterate the set and arrange the element of the set in a list.
596 The elements are arranged dependent of their value descending.*/
598 foreach_pset(hmap.map, entry_t, entry) {
599 entry->weight = calculate_weight(entry);
602 * Do not put entry with a weight < threshold in the list
604 if (entry->weight < threshold) {
609 /* put entry in the heavy uses list */
611 if (! hmap.heavy_uses)
612 hmap.heavy_uses = entry;
614 if (entry->weight >= hmap.heavy_uses->weight) {
615 entry->next = hmap.heavy_uses;
616 hmap.heavy_uses = entry;
618 for (p = hmap.heavy_uses; p->next; p = p->next) {
619 if (entry->weight >= p->next->weight) {
620 entry->next = p->next;
635 /* Print some information about the list. */
636 DB((dbg, LEVEL_2, "-----------------\n"));
637 for (entry_t *entry = hmap.heavy_uses; entry; entry = entry->next) {
638 DB((dbg, LEVEL_2, "\nweight: is %f\n", entry->weight));
639 DB((dbg, LEVEL_2, "Call for Method %E\n", entry->q.ent));
640 DB((dbg, LEVEL_2, "Position %zu\n", entry->q.pos));
641 DB((dbg, LEVEL_2, "Value %T\n", entry->q.tv));
644 entry_t *const entry = hmap.heavy_uses;
646 quadruple_t *qp = &entry->q;
648 ir_entity *ent = clone_method(qp);
649 DB((dbg, LEVEL_1, "Cloned <%+F, %zu, %T> info %+F\n", qp->ent, qp->pos, qp->tv, ent));
651 hmap.heavy_uses = entry->next;
653 /* We must exchange the copies of this call in all clones too.*/
654 exchange_calls(&entry->q, ent);
658 * after we exchanged all calls, some entries on the list for
659 * the next cloned entity may get invalid, so we have to check
660 * them and may even update the list of heavy uses.
662 reorder_weights(&hmap, threshold);
665 obstack_free(&hmap.obst, NULL);
668 typedef struct pass_t {
674 * Wrapper to run proc_cloning() as an ir_prog pass.
676 static int proc_cloning_wrapper(ir_prog *irp, void *context)
678 pass_t *pass = (pass_t*)context;
681 proc_cloning(pass->threshold);
685 /* create a ir_prog pass */
686 ir_prog_pass_t *proc_cloning_pass(const char *name, float threshold)
688 pass_t *pass = XMALLOCZ(pass_t);
690 pass->threshold = threshold;
691 return def_prog_pass_constructor(
692 &pass->pass, name ? name : "cloning", proc_cloning_wrapper);