2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Spillslot coalescer.
23 * @author Matthias Braun
36 #include "unionfind.h"
42 #include "bespillslots.h"
43 #include "bechordal_t.h"
44 #include "bestatevent.h"
46 #include "beintlive_t.h"
50 #define DBG_COALESCING 1
51 #define DBG_INTERFERENCES 2
53 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
55 typedef struct spill_t {
57 const ir_mode *mode; /**< mode of the spilled value */
58 int alignment; /**< alignment for the spilled value */
62 typedef struct affinity_edge_t {
74 affinity_edge_t **affinity_edges;
76 set_frame_entity_func set_frame_entity;
77 bool at_begin; /**< frame entities should be allocate at
78 the beginning of the stackframe */
81 /** Compare 2 affinity edges (used in quicksort) */
82 static int cmp_affinity(const void *d1, const void *d2)
84 const affinity_edge_t * const *e1 = (const affinity_edge_t**)d1;
85 const affinity_edge_t * const *e2 = (const affinity_edge_t**)d2;
86 double aff1 = (*e1)->affinity;
87 double aff2 = (*e2)->affinity;
89 /* sort in descending order */
92 } else if (aff1 > aff2) {
95 int slot11 = (*e1)->slot1;
96 int slot21 = (*e2)->slot1;
97 if (slot11 < slot21) {
99 } else if (slot11 > slot21) {
102 int slot12 = (*e1)->slot2;
103 int slot22 = (*e2)->slot2;
104 return (slot12<slot22) - (slot12<slot22);
109 static spill_t *get_spill(be_fec_env_t *env, ir_node *node)
111 assert(rbitset_is_set(env->spills_set, get_irn_idx(node)));
112 return (spill_t*)get_irn_link(node);
115 static inline ir_node *get_memory_edge(const ir_node *node)
119 arity = get_irn_arity(node);
120 for (i = arity - 1; i >= 0; --i) {
121 ir_node *arg = get_irn_n(node, i);
122 if (get_irn_mode(arg) == mode_M)
129 static spill_t *collect_spill(be_fec_env_t *env, ir_node *node,
130 const ir_mode *mode, int align)
134 /* already in spill set? */
135 unsigned idx = get_irn_idx(node);
136 if (rbitset_is_set(env->spills_set, idx)) {
137 spill_t *spill = get_spill(env, node);
138 assert(spill->mode == mode);
139 assert(spill->alignment == align);
142 rbitset_set(env->spills_set, idx);
144 spill = OALLOC(&env->obst, spill_t);
145 /* insert into set of spills if not already there */
148 spill->alignment = align;
149 spill->spillslot = (int)ARR_LEN(env->spills);
150 ARR_APP1(spill_t*, env->spills, spill);
151 set_irn_link(node, spill);
152 DB((dbg, DBG_COALESCING, "Slot %d: %+F\n", spill->spillslot, node));
155 const ir_exec_freq *exec_freq = be_get_irg_exec_freq(env->irg);
156 int arity = get_irn_arity(node);
158 for (i = 0; i < arity; ++i) {
159 affinity_edge_t *affinty_edge;
160 ir_node *arg = get_irn_n(node, i);
161 spill_t *arg_spill = collect_spill(env, arg, mode, align);
162 ir_node *block = get_nodes_block(arg);
164 /* add an affinity edge */
165 affinty_edge = OALLOC(&env->obst, affinity_edge_t);
166 affinty_edge->affinity = get_block_execfreq(exec_freq, block);
167 affinty_edge->slot1 = spill->spillslot;
168 affinty_edge->slot2 = arg_spill->spillslot;
169 ARR_APP1(affinity_edge_t*, env->affinity_edges, affinty_edge);
176 void be_node_needs_frame_entity(be_fec_env_t *env, ir_node *node,
177 const ir_mode *mode, int align)
179 ir_node *spillnode = get_memory_edge(node);
180 assert(spillnode != NULL);
182 /* walk upwards and collect all phis and spills on this way */
183 collect_spill(env, spillnode, mode, align);
185 ARR_APP1(ir_node *, env->reloads, node);
188 static int merge_interferences(be_fec_env_t *env, bitset_t** interferences,
189 int* spillslot_unionfind, int s1, int s2)
195 /* merge spillslots and interferences */
196 res = uf_union(spillslot_unionfind, s1, s2);
197 /* we assume that we always merge s2 to s1 so swap s1, s2 if necessary */
204 bitset_or(interferences[s1], interferences[s2]);
206 /* update other interferences */
207 spillcount = ARR_LEN(env->spills);
208 for (i = 0; i < spillcount; ++i) {
209 bitset_t *intfs = interferences[i];
210 if (bitset_is_set(intfs, s2))
211 bitset_set(intfs, s1);
217 static int my_values_interfere2(ir_graph *irg, const ir_node *a,
220 be_lv_t *lv = be_get_irg_liveness(irg);
222 int a2b = _value_dominates(a, b);
223 int b2a = _value_dominates(b, a);
225 /* If there is no dominance relation, they do not interfere. */
226 if ((a2b | b2a) > 0) {
227 const ir_edge_t *edge;
231 * Adjust a and b so, that a dominates b if
232 * a dominates b or vice versa.
235 const ir_node *t = a;
240 bb = get_nodes_block(b);
243 * If a is live end in b's block it is
244 * live at b's definition (a dominates b)
246 if (be_is_live_end(lv, bb, a))
250 * Look at all usages of a.
251 * If there's one usage of a in the block of b, then
252 * we check, if this use is dominated by b, if that's true
253 * a and b interfere. Note that b must strictly dominate the user,
254 * since if b is the last user of in the block, b and a do not
256 * Uses of a not in b's block can be disobeyed, because the
257 * check for a being live at the end of b's block is already
260 foreach_out_edge(a, edge) {
261 const ir_node *user = get_edge_src_irn(edge);
263 const ir_edge_t *edge2;
264 foreach_out_edge(user, edge2) {
265 const ir_node *user2 = get_edge_src_irn(edge2);
266 assert(!is_Sync(user2));
267 if (get_nodes_block(user2) == bb && !is_Phi(user2) &&
268 _value_strictly_dominates(b, user2))
272 if (get_nodes_block(user) == bb && !is_Phi(user) &&
273 _value_strictly_dominates(b, user))
283 * same as values_interfere but with special handling for Syncs
285 static int my_values_interfere(ir_graph *irg, ir_node *a, ir_node *b)
288 int i, arity = get_irn_arity(a);
289 for (i = 0; i < arity; ++i) {
290 ir_node *in = get_irn_n(a, i);
291 if (my_values_interfere(irg, in, b))
295 } else if (is_Sync(b)) {
296 int i, arity = get_irn_arity(b);
297 for (i = 0; i < arity; ++i) {
298 ir_node *in = get_irn_n(b, i);
299 /* a is not a sync, so no need for my_values_interfere */
300 if (my_values_interfere2(irg, a, in))
306 return my_values_interfere2(irg, a, b);
310 * A greedy coalescing algorithm for spillslots:
311 * 1. Sort the list of affinity edges
312 * 2. Try to merge slots with affinity edges (most expensive slots first)
313 * 3. Try to merge everything else that is possible
315 static void do_greedy_coalescing(be_fec_env_t *env)
317 spill_t **spills = env->spills;
318 size_t spillcount = ARR_LEN(spills);
320 size_t affinity_edge_count;
321 bitset_t **interferences;
322 int* spillslot_unionfind;
330 DB((dbg, DBG_COALESCING, "Coalescing %d spillslots\n", spillcount));
332 interferences = OALLOCN(&data, bitset_t*, spillcount);
333 spillslot_unionfind = OALLOCN(&data, int, spillcount);
335 uf_init(spillslot_unionfind, spillcount);
337 for (i = 0; i < spillcount; ++i) {
338 interferences[i] = bitset_obstack_alloc(&data, spillcount);
341 /* construct interferences */
342 for (i = 0; i < spillcount; ++i) {
344 ir_node *spill1 = spills[i]->spill;
345 if (is_NoMem(spill1))
348 for (i2 = i+1; i2 < spillcount; ++i2) {
349 ir_node *spill2 = spills[i2]->spill;
350 if (is_NoMem(spill2))
353 if (my_values_interfere(env->irg, spill1, spill2)) {
354 DB((dbg, DBG_INTERFERENCES,
355 "Slot %d and %d interfere\n", i, i2));
357 bitset_set(interferences[i], i2);
358 bitset_set(interferences[i2], i);
363 /* sort affinity edges */
364 affinity_edge_count = ARR_LEN(env->affinity_edges);
365 qsort(env->affinity_edges, affinity_edge_count,
366 sizeof(env->affinity_edges[0]), cmp_affinity);
368 /* try to merge affine nodes */
369 for (i = 0; i < affinity_edge_count; ++i) {
370 const affinity_edge_t *edge = env->affinity_edges[i];
371 int s1 = uf_find(spillslot_unionfind, edge->slot1);
372 int s2 = uf_find(spillslot_unionfind, edge->slot2);
374 /* test if values interfere */
375 if (bitset_is_set(interferences[s1], s2)) {
376 assert(bitset_is_set(interferences[s2], s1));
380 DB((dbg, DBG_COALESCING,
381 "Merging %d and %d because of affinity edge\n", s1, s2));
383 merge_interferences(env, interferences, spillslot_unionfind, s1, s2);
386 /* try to merge as much remaining spillslots as possible */
387 for (i = 0; i < spillcount; ++i) {
389 int s1 = uf_find(spillslot_unionfind, i);
393 for (i2 = i+1; i2 < spillcount; ++i2) {
394 int s2 = uf_find(spillslot_unionfind, i2);
398 /* test if values interfere
399 * we have to test n1-n2 and n2-n1, because only 1 side gets updated
400 * when node merging occurs
402 if (bitset_is_set(interferences[s1], s2)) {
403 assert(bitset_is_set(interferences[s2], s1));
407 DB((dbg, DBG_COALESCING,
408 "Merging %d and %d because it is possible\n", s1, s2));
410 if (merge_interferences(env, interferences, spillslot_unionfind, s1, s2) != 0) {
411 /* we can break the loop here, because s2 is the new supernode
412 * now and we'll test s2 again later anyway */
418 /* assign spillslots to spills */
419 for (i = 0; i < spillcount; ++i) {
420 spills[i]->spillslot = uf_find(spillslot_unionfind, i);
423 obstack_free(&data, 0);
426 typedef struct spill_slot_t {
432 typedef struct memperm_entry_t {
437 struct memperm_entry_t *next;
440 typedef struct memperm_t {
443 memperm_entry_t *entries;
446 static int cmp_memperm(const void* d1, const void* d2, size_t size)
448 const memperm_t* e1 = (const memperm_t*)d1;
449 const memperm_t* e2 = (const memperm_t*)d2;
452 return e1->block != e2->block;
455 static memperm_t *get_memperm(be_fec_env_t *env, ir_node *block)
457 memperm_t entry, *res;
461 hash = hash_irn(block);
463 res = (memperm_t*)set_find(env->memperms, &entry, sizeof(entry), hash);
466 entry.entrycount = 0;
467 entry.entries = NULL;
468 res = (memperm_t*)set_insert(env->memperms, &entry, sizeof(entry), hash);
474 static ir_entity* create_stack_entity(be_fec_env_t *env, spill_slot_t *slot)
476 ir_graph *irg = env->irg;
477 ir_type *frame = get_irg_frame_type(irg);
478 ir_entity *res = frame_alloc_area(frame, slot->size, slot->align,
486 * Enlarges a spillslot (if necessary) so that it can carry a value of size
487 * @p othersize and alignment @p otheralign.
489 static void enlarge_spillslot(spill_slot_t *slot, int otheralign, int othersize)
491 if (othersize > slot->size) {
492 slot->size = othersize;
494 if (otheralign > slot->align) {
495 if (otheralign % slot->align != 0)
496 slot->align *= otheralign;
498 slot->align = otheralign;
499 } else if (slot->align % otheralign != 0) {
500 slot->align *= otheralign;
504 static void assign_spill_entity(be_fec_env_t *env,
505 ir_node *node, ir_entity *entity)
512 arity = get_irn_arity(node);
513 for (i = 0; i < arity; ++i) {
514 ir_node *in = get_irn_n(node, i);
517 assign_spill_entity(env, in, entity);
522 /* beware: we might have Stores with Memory Proj's, ia32 fisttp for
524 node = skip_Proj(node);
525 assert(arch_get_frame_entity(node) == NULL);
526 env->set_frame_entity(node, entity);
530 * Create stack entities for the spillslots and assign them to the spill and
533 static void assign_spillslots(be_fec_env_t *env)
535 spill_t **spills = env->spills;
536 size_t spillcount = ARR_LEN(spills);
537 spill_slot_t *spillslots = ALLOCANZ(spill_slot_t, spillcount);
540 /* construct spillslots */
541 for (s = 0; s < spillcount; ++s) {
542 const spill_t *spill = spills[s];
543 int slotid = spill->spillslot;
544 const ir_mode *mode = spill->mode;
545 spill_slot_t *slot = & (spillslots[slotid]);
546 int size = get_mode_size_bytes(mode);
547 int align = spill->alignment;
549 if (slot->align == 0 && slot->size == 0) {
553 enlarge_spillslot(slot, align, size);
557 for (s = 0; s < spillcount; ++s) {
558 const spill_t *spill = spills[s];
559 ir_node *node = spill->spill;
560 int slotid = spill->spillslot;
561 spill_slot_t *slot = &spillslots[slotid];
563 if (slot->entity == NULL) {
564 create_stack_entity(env, slot);
568 int arity = get_irn_arity(node);
570 ir_node *block = get_nodes_block(node);
572 /* should be a PhiM */
573 assert(get_irn_mode(node) == mode_M);
575 for (i = 0; i < arity; ++i) {
576 ir_node *arg = get_irn_n(node, i);
577 ir_node *predblock = get_Block_cfgpred_block(block, i);
578 spill_t *argspill = get_spill(env, arg);
579 int argslotid = argspill->spillslot;
581 if (slotid != argslotid) {
583 memperm_entry_t *entry;
584 spill_slot_t *argslot = &spillslots[argslotid];
585 if (argslot->entity == NULL) {
586 create_stack_entity(env, argslot);
589 memperm = get_memperm(env, predblock);
591 entry = OALLOC(&env->obst, memperm_entry_t);
594 entry->in = argslot->entity;
595 entry->out = slot->entity;
596 entry->next = memperm->entries;
597 memperm->entrycount++;
598 memperm->entries = entry;
602 assign_spill_entity(env, node, slot->entity);
606 for (s = 0; s < ARR_LEN(env->reloads); ++s) {
607 ir_node *reload = env->reloads[s];
608 ir_node *spillnode = get_memory_edge(reload);
609 const spill_t *spill = get_spill(env, spillnode);
610 const spill_slot_t *slot = &spillslots[spill->spillslot];
612 assert(slot->entity != NULL);
614 env->set_frame_entity(reload, slot->entity);
619 * Returns the last node in a block which is no control flow changing node
621 static ir_node *get_end_of_block_insertion_point(ir_node* block)
623 ir_node* ins = sched_last(block);
624 while (is_Proj(ins) && get_irn_mode(ins) == mode_X) {
625 ins = sched_prev(ins);
631 ir_node *prev = sched_prev(ins);
641 static void create_memperms(be_fec_env_t *env)
645 foreach_set(env->memperms, memperm_t*, memperm) {
646 ir_node **nodes = ALLOCAN(ir_node*, memperm->entrycount);
647 memperm_entry_t *entry;
649 ir_node *mempermnode;
652 assert(memperm->entrycount > 0);
654 for (entry = memperm->entries, i = 0; entry != NULL; entry = entry->next, ++i) {
655 ir_node* arg = get_irn_n(entry->node, entry->pos);
659 mempermnode = be_new_MemPerm(memperm->block, memperm->entrycount,
662 /* insert node into schedule */
663 blockend = get_end_of_block_insertion_point(memperm->block);
664 sched_add_before(blockend, mempermnode);
665 stat_ev_dbl("mem_perm", memperm->entrycount);
668 for (entry = memperm->entries; entry != NULL; entry = entry->next, ++i) {
670 ir_node* arg = get_irn_n(entry->node, entry->pos);
672 be_set_MemPerm_in_entity(mempermnode, i, entry->in);
673 be_set_MemPerm_out_entity(mempermnode, i, entry->out);
674 proj = new_r_Proj(mempermnode, get_irn_mode(arg), i);
676 set_irn_n(entry->node, entry->pos, proj);
681 static unsigned count_spillslots(const be_fec_env_t *env)
683 size_t spillcount = ARR_LEN(env->spills);
684 unsigned slotcount = 0;
688 rbitset_alloca(counted, spillcount);
690 for (s = 0; s < spillcount; ++s) {
691 spill_t *spill = env->spills[s];
692 int spillslot = spill->spillslot;
693 if (!rbitset_is_set(counted, spillslot)) {
695 rbitset_set(counted, spillslot);
702 be_fec_env_t *be_new_frame_entity_coalescer(ir_graph *irg)
704 be_fec_env_t *env = XMALLOCZ(be_fec_env_t);
706 be_assure_live_chk(irg);
708 obstack_init(&env->obst);
710 env->spills = NEW_ARR_F(spill_t*, 0);
711 env->spills_set = rbitset_malloc(get_irg_last_idx(irg));
712 env->reloads = NEW_ARR_F(ir_node*, 0);
713 env->affinity_edges = NEW_ARR_F(affinity_edge_t*, 0);
714 env->memperms = new_set(cmp_memperm, 10);
716 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
721 void be_free_frame_entity_coalescer(be_fec_env_t *env)
723 ir_free_resources(env->irg, IR_RESOURCE_IRN_LINK);
725 del_set(env->memperms);
726 DEL_ARR_F(env->reloads);
727 DEL_ARR_F(env->affinity_edges);
728 DEL_ARR_F(env->spills);
729 xfree(env->spills_set);
730 obstack_free(&env->obst, NULL);
735 void be_assign_entities(be_fec_env_t *env,
736 set_frame_entity_func set_frame_entity,
737 bool alloc_entities_at_begin)
739 env->set_frame_entity = set_frame_entity;
740 env->at_begin = alloc_entities_at_begin;
742 if (stat_ev_enabled) {
743 stat_ev_dbl("spillslots", ARR_LEN(env->spills));
746 if (be_coalesce_spill_slots) {
747 do_greedy_coalescing(env);
750 if (stat_ev_enabled) {
751 stat_ev_dbl("spillslots_after_coalescing", count_spillslots(env));
754 assign_spillslots(env);
756 create_memperms(env);
759 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillslots)
760 void be_init_spillslots(void)
762 FIRM_DBG_REGISTER(dbg, "firm.be.spillslots");