2 * Copyright (C) 1995-2012 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Generic hashset implementation
23 * @author Matthias Braun, inspiration from densehash from google sparsehash
28 * You have to specialize this file by defining:
31 * <li><b>HashSet</b> The name of the hashset type</li>
32 * <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
33 * <li><b>ValueType</b> The type of the stored data values</li>
34 * <li><b>NullValue</b> A special value representing no values</li>
35 * <li><b>DeletedValue</b> A special value representing deleted entries</li>
36 * <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
39 * Note that by default it is assumed that the data values themselves are used
40 * as keys. However you can change that with additional defines:
43 * <li><b>KeyType</b> The type of the keys identifying data values.
44 * Defining this implies, that a data value contains
45 * more than just the key.</li>
46 * <li><b>GetKey(value)</b> Extracts the key from a data value</li>
47 * <li><b>KeysEqual(hashset,key1,key2)</b> Tests wether 2 keys are equal</li>
48 * <li><b>DO_REHASH</b> Instead of storing the hash-values, recalculate
49 * them on demand from the datavalues. (useful if
50 * calculating the hash-values takes less time than
51 * a memory access)</li>
54 * You can further fine tune your hashset by defining the following:
57 * <li><b>JUMP(num_probes)</b> The probing method</li>
58 * <li><b>Alloc(count)</b> Allocates count hashset entries (NOT bytes)</li>
59 * <li><b>Free(ptr)</b> Frees a block of memory allocated by Alloc</li>
60 * <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
62 * <li><b>ADDITIONAL_DATA<b> Additional fields appended to the hashset struct</li>
71 #include "bitfiddle.h"
74 /* quadratic probing */
76 #define JUMP(num_probes) (num_probes)
81 #define Hash(self,key) ((unsigned)(((char *)key) - (char *)0))
85 #define HashSetEntry ValueType
86 #define EntrySetHash(entry,new_hash) ((void)0)
87 #define EntryGetHash(self,entry) Hash(self, GetKey(entry))
88 #define EntryGetValue(entry) (entry)
89 #else /* ! DO_REHASH */
90 #define EntryGetHash(self,entry) (entry).hash
91 #define EntrySetHash(entry,new_hash) (entry).hash = (new_hash)
92 #define EntryGetValue(entry) (entry).data
93 #endif /* DO_REHASH */
97 #define Alloc(size) XMALLOCN(HashSetEntry, (size))
98 #define Free(ptr) free(ptr)
102 #define FindReturnValue bool
103 #define GetFindReturnValue(entry,found) (found)
104 #define NullReturnValue false
105 #define InsertReturnValue(findreturn) !(findreturn)
106 #else /* ! ID_HASH */
108 #define FindReturnValue ValueType
109 #define GetFindReturnValue(entry,found) EntryGetValue(entry)
110 #define NullReturnValue NullValue
112 #define FindReturnValue ValueType*
113 #define GetFindReturnValue(entry,found) & EntryGetValue(entry)
114 #define NullReturnValue & NullValue
118 #ifndef InsertReturnValue
119 #define InsertReturnValue(findreturn) findreturn
123 #define KeyType ValueType
124 #define GetKey(value) (value)
125 #define InitData(self,value,key) (value) = (key)
129 #define ConstKeyType const KeyType
130 #endif /* ConstKeyType */
132 #ifndef EntrySetEmpty
133 #define EntrySetEmpty(entry) EntryGetValue(entry) = NullValue
134 #endif /* EntrySetEmpty */
135 #ifndef EntrySetDeleted
136 #define EntrySetDeleted(entry) EntryGetValue(entry) = DeletedValue
137 #endif /* EntrySetDeleted */
139 #define EntryIsEmpty(entry) (EntryGetValue(entry) == NullValue)
140 #endif /* EntryIsEmpty */
141 #ifndef EntryIsDeleted
142 #define EntryIsDeleted(entry) (EntryGetValue(entry) == DeletedValue)
143 #endif /* EntryIsDeleted */
144 #ifndef SetRangeEmpty
145 #define SetRangeEmpty(ptr,size) \
148 size_t _size = (size); \
149 HashSetEntry *entries = (ptr); \
150 for (_i = 0; _i < _size; ++_i) { \
151 HashSetEntry *entry = & entries[_i]; \
152 EntrySetEmpty(*entry); \
155 #endif /* SetRangeEmpty */
157 #ifndef HT_OCCUPANCY_FLT
158 /** how full before we double size */
159 #define HT_OCCUPANCY_FLT(x) ((x)/2)
160 #endif /* HT_OCCUPANCY_FLT */
161 #ifndef HT_1_DIV_OCCUPANCY_FLT
162 #define HT_1_DIV_OCCUPANCY_FLT 2
166 /** how empty before we half size */
167 #define HT_EMPTY_FLT(x) ((x)/5)
168 #endif /* HT_EMPTY_FLT */
170 #ifndef HT_MIN_BUCKETS
171 /** default smallest bucket size */
172 #define HT_MIN_BUCKETS 32
173 #endif /* HT_MIN_BUCKETS */
175 #define ILLEGAL_POS ((size_t)-1)
179 * Returns the number of elements in the hashset
181 size_t hashset_size(const HashSet *self)
183 return self->num_elements - self->num_deleted;
186 static inline size_t hashset_size(const HashSet *self)
188 return self->num_elements - self->num_deleted;
193 * Inserts an element into a hashset without growing the set (you have to make
194 * sure there's enough room for that.
195 * @returns previous value if found, NullValue otherwise
196 * @note also see comments for hashset_insert()
199 static inline FindReturnValue insert_nogrow(HashSet *self, KeyType key)
201 size_t num_probes = 0;
202 size_t num_buckets = self->num_buckets;
203 size_t hashmask = num_buckets - 1;
204 unsigned hash = Hash(self, key);
205 size_t bucknum = hash & hashmask;
206 size_t insert_pos = ILLEGAL_POS;
208 assert((num_buckets & (num_buckets - 1)) == 0);
211 HashSetEntry *entry = & self->entries[bucknum];
213 if (EntryIsEmpty(*entry)) {
215 HashSetEntry *nentry;
217 if (insert_pos != ILLEGAL_POS) {
223 nentry = &self->entries[p];
224 InitData(self, EntryGetValue(*nentry), key);
225 EntrySetHash(*nentry, hash);
226 self->num_elements++;
227 return GetFindReturnValue(*nentry, false);
229 if (EntryIsDeleted(*entry)) {
230 if (insert_pos == ILLEGAL_POS)
231 insert_pos = bucknum;
232 } else if (EntryGetHash(self, *entry) == hash) {
233 if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
234 // Value already in the set, return it
235 return GetFindReturnValue(*entry, true);
240 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
241 assert(num_probes < num_buckets);
246 * calculate shrink and enlarge limits
249 static inline void reset_thresholds(HashSet *self)
251 self->enlarge_threshold = (size_t) HT_OCCUPANCY_FLT(self->num_buckets);
252 self->shrink_threshold = (size_t) HT_EMPTY_FLT(self->num_buckets);
253 self->consider_shrink = 0;
256 #ifndef HAVE_OWN_RESIZE
258 * Inserts an element into a hashset under the assumption that the hashset
259 * contains no deleted entries and the element doesn't exist in the hashset yet.
262 static void insert_new(HashSet *self, unsigned hash, ValueType value)
264 size_t num_probes = 0;
265 size_t num_buckets = self->num_buckets;
266 size_t hashmask = num_buckets - 1;
267 size_t bucknum = hash & hashmask;
268 size_t insert_pos = ILLEGAL_POS;
270 //assert(value != NullValue);
273 HashSetEntry *entry = & self->entries[bucknum];
275 if (EntryIsEmpty(*entry)) {
277 HashSetEntry *nentry;
279 if (insert_pos != ILLEGAL_POS) {
284 nentry = &self->entries[p];
286 EntryGetValue(*nentry) = value;
287 EntrySetHash(*nentry, hash);
288 self->num_elements++;
291 assert(!EntryIsDeleted(*entry));
294 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
295 assert(num_probes < num_buckets);
303 static inline void resize(HashSet *self, size_t new_size)
305 size_t num_buckets = self->num_buckets;
307 HashSetEntry *old_entries = self->entries;
308 HashSetEntry *new_entries;
310 /* allocate a new array with double size */
311 new_entries = Alloc(new_size);
312 SetRangeEmpty(new_entries, new_size);
314 /* use the new array */
315 self->entries = new_entries;
316 self->num_buckets = new_size;
317 self->num_elements = 0;
318 self->num_deleted = 0;
320 self->entries_version++;
322 reset_thresholds(self);
324 /* reinsert all elements */
325 for (i = 0; i < num_buckets; ++i) {
326 HashSetEntry *entry = & old_entries[i];
327 if (EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
330 insert_new(self, EntryGetHash(self, *entry), EntryGetValue(*entry));
333 /* now we can free the old array */
338 /* resize must be defined outside */
339 static inline void resize(HashSet *self, size_t new_size);
344 * grow the hashset if adding 1 more elements would make it too crowded
347 static inline void maybe_grow(HashSet *self)
349 if (LIKELY(self->num_elements + 1 <= self->enlarge_threshold))
353 if (self->num_elements - self->num_deleted + 2 > self->enlarge_threshold) {
354 /* double table size */
355 resize_to = self->num_buckets * 2;
356 if (resize_to <= self->num_buckets) {
360 /* no need to resize, we just clean up the deleted entries */
361 resize_to = self->num_buckets;
363 resize(self, resize_to);
367 * shrink the hashset if it is only sparsely filled
370 static inline void maybe_shrink(HashSet *self)
375 if (!self->consider_shrink)
378 self->consider_shrink = 0;
379 size = hashset_size(self);
380 if (size <= HT_MIN_BUCKETS)
383 if (LIKELY(size > self->shrink_threshold))
386 resize_to = ceil_po2(size);
391 resize(self, resize_to);
394 #ifdef hashset_insert
396 * Insert an element into the hashset. If no element with the given key exists yet,
397 * then a new one is created and initialized with the InitData function.
398 * Otherwise the existing element is returned (for hashs where key is equal to
399 * value, nothing is returned.)
401 * @param self the hashset
402 * @param key the key that identifies the data
403 * @returns the existing or newly created data element (or nothing in case of hashs where keys are the while value)
405 FindReturnValue hashset_insert(HashSet *self, KeyType key)
408 self->entries_version++;
413 return InsertReturnValue(insert_nogrow(self, key));
419 * Searches for an element with key @p key.
421 * @param self the hashset
422 * @param key the key to search for
423 * @returns the found value or NullValue if nothing was found
425 FindReturnValue hashset_find(const HashSet *self, ConstKeyType key)
427 size_t num_probes = 0;
428 size_t num_buckets = self->num_buckets;
429 size_t hashmask = num_buckets - 1;
430 unsigned hash = Hash(self, key);
431 size_t bucknum = hash & hashmask;
434 HashSetEntry *entry = & self->entries[bucknum];
436 if (EntryIsEmpty(*entry)) {
437 return NullReturnValue;
439 if (EntryIsDeleted(*entry)) {
441 } else if (EntryGetHash(self, *entry) == hash) {
442 if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
444 return GetFindReturnValue(*entry, true);
449 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
450 assert(num_probes < num_buckets);
455 #ifdef hashset_remove
457 * Removes an element from a hashset. Does nothing if the set doesn't contain
460 * @param self the hashset
461 * @param key key that identifies the data to remove
463 void hashset_remove(HashSet *self, ConstKeyType key)
465 size_t num_probes = 0;
466 size_t num_buckets = self->num_buckets;
467 size_t hashmask = num_buckets - 1;
468 unsigned hash = Hash(self, key);
469 size_t bucknum = hash & hashmask;
472 self->entries_version++;
476 HashSetEntry *entry = & self->entries[bucknum];
478 if (EntryIsEmpty(*entry)) {
481 if (EntryIsDeleted(*entry)) {
483 } else if (EntryGetHash(self, *entry) == hash) {
484 if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
485 EntrySetDeleted(*entry);
487 self->consider_shrink = 1;
493 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
494 assert(num_probes < num_buckets);
500 * Initializes hashset with a specific size
503 static inline void init_size(HashSet *self, size_t initial_size)
505 if (initial_size < 4)
508 self->entries = Alloc(initial_size);
509 SetRangeEmpty(self->entries, initial_size);
510 self->num_buckets = initial_size;
511 self->consider_shrink = 0;
512 self->num_elements = 0;
513 self->num_deleted = 0;
515 self->entries_version = 0;
517 #ifdef ADDITIONAL_INIT
521 reset_thresholds(self);
526 * Initializes a hashset with the default size. The memory for the set has to
529 void hashset_init(HashSet *self)
531 init_size(self, HT_MIN_BUCKETS);
535 #ifdef hashset_destroy
537 * Destroys a hashset, freeing all used memory (except the memory for the
538 * HashSet struct itself).
540 void hashset_destroy(HashSet *self)
542 #ifdef ADDITIONAL_TERM
547 self->entries = NULL;
552 #ifdef hashset_init_size
554 * Initializes a hashset expecting expected_element size.
556 void hashset_init_size(HashSet *self, size_t expected_elements)
561 if (expected_elements >= UINT_MAX/2) {
565 needed_size = expected_elements * HT_1_DIV_OCCUPANCY_FLT;
566 po2size = ceil_po2(needed_size);
567 init_size(self, po2size);
571 #ifdef hashset_iterator_init
573 * Initializes a hashset iterator. The memory for the allocator has to be
575 * @note it is not allowed to remove or insert elements while iterating
577 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset)
579 self->current_bucket = hashset->entries - 1;
580 self->end = hashset->entries + hashset->num_buckets;
583 self->entries_version = hashset->entries_version;
588 #ifdef hashset_iterator_next
590 * Returns the next value in the iterator or NULL if no value is left
592 * @note it is not allowed to remove or insert elements while iterating
594 ValueType hashset_iterator_next(HashSetIterator *self)
596 HashSetEntry *current_bucket = self->current_bucket;
597 HashSetEntry *end = self->end;
599 /* using hashset_insert or hashset_remove is not allowed while iterating */
600 assert(self->entries_version == self->set->entries_version);
604 if (current_bucket >= end)
606 } while (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket));
608 self->current_bucket = current_bucket;
609 return EntryGetValue(*current_bucket);
613 #ifdef hashset_remove_iterator
615 * Removes the element the iterator points to. Removing an element a second time
618 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter)
620 HashSetEntry *entry = iter->current_bucket;
622 /* iterator_next needs to have been called at least once */
623 assert(entry >= self->entries);
624 /* needs to be on a valid element */
625 assert(entry < self->entries + self->num_buckets);
627 if (EntryIsDeleted(*entry))
630 EntrySetDeleted(*entry);
632 self->consider_shrink = 1;